[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [xenppc-unstable] [POWERPC] merge with http://xenbits.xensource.com/xen-unstable.hg
# HG changeset patch # User Jimi Xenidis <jimix@xxxxxxxxxxxxxx> # Node ID 5f92043a3ab181b6f21f5816cd5d4d0fa74eefde # Parent bb510c274af8991aaa53af469d7f50efa134c5fd # Parent befab551b0e13c70f11a8b2192e126ab4de47439 [POWERPC] merge with http://xenbits.xensource.com/xen-unstable.hg --- extras/mini-os/include/hypercall-x86_32.h | 326 ----- extras/mini-os/include/hypercall-x86_64.h | 326 ----- extras/mini-os/include/os.h | 561 --------- extras/mini-os/include/spinlock.h | 121 -- extras/mini-os/include/traps.h | 73 - extras/mini-os/traps.c | 229 ---- buildconfigs/linux-defconfig_xen0_ia64 | 6 buildconfigs/linux-defconfig_xen0_x86_32 | 1 buildconfigs/linux-defconfig_xen0_x86_64 | 1 buildconfigs/linux-defconfig_xenU_ia64 | 1 buildconfigs/linux-defconfig_xen_ia64 | 6 buildconfigs/linux-defconfig_xen_x86_32 | 1 buildconfigs/linux-defconfig_xen_x86_64 | 1 docs/src/user.tex | 3 extras/mini-os/Makefile | 53 extras/mini-os/arch/x86/traps.c | 229 ++++ extras/mini-os/include/hypervisor.h | 3 extras/mini-os/include/types.h | 4 extras/mini-os/include/x86/os.h | 561 +++++++++ extras/mini-os/include/x86/spinlock.h | 121 ++ extras/mini-os/include/x86/traps.h | 73 + extras/mini-os/include/x86/x86_32/hypercall-x86_32.h | 326 +++++ extras/mini-os/include/x86/x86_64/hypercall-x86_64.h | 326 +++++ linux-2.6-xen-sparse/arch/ia64/Kconfig | 28 linux-2.6-xen-sparse/arch/ia64/kernel/gate.S | 2 linux-2.6-xen-sparse/arch/ia64/xen/Makefile | 6 linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S | 41 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 5 linux-2.6-xen-sparse/arch/ia64/xen/util.c | 23 linux-2.6-xen-sparse/drivers/xen/Kconfig | 11 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 9 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 21 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c | 6 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 55 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c | 6 linux-2.6-xen-sparse/drivers/xen/console/console.c | 4 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 4 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 12 linux-2.6-xen-sparse/drivers/xen/pciback/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/pciback/slot.c | 151 ++ linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 67 - linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h | 3 linux-2.6-xen-sparse/include/asm-ia64/agp.h | 12 linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h | 22 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h | 23 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h | 16 linux-2.6-xen-sparse/include/asm-ia64/io.h | 8 linux-2.6-xen-sparse/include/asm-ia64/machvec.h | 2 linux-2.6-xen-sparse/include/asm-ia64/page.h | 10 tools/examples/xend-config.sxp | 2 tools/examples/xmexample.hvm | 4 tools/firmware/Makefile | 2 tools/firmware/acpi/acpi_fadt.h | 3 tools/firmware/hvmloader/hvmloader.c | 9 tools/ioemu/hw/pc.c | 17 tools/ioemu/hw/piix4acpi.c | 4 tools/ioemu/patches/acpi-support | 48 tools/ioemu/patches/acpi-timer-support | 4 tools/ioemu/patches/domain-destroy | 10 tools/ioemu/patches/domain-reset | 10 tools/ioemu/patches/domain-timeoffset | 8 tools/ioemu/patches/qemu-target-i386-dm | 25 tools/ioemu/patches/series | 2 tools/ioemu/patches/vnc-display-find-unused | 101 + tools/ioemu/patches/xen-support-buffered-ioreqs | 77 + tools/ioemu/patches/xenstore-block-device-config | 49 tools/ioemu/patches/xenstore-write-vnc-port | 12 tools/ioemu/target-i386-dm/helper2.c | 107 + tools/ioemu/vl.c | 20 tools/ioemu/vl.h | 5 tools/ioemu/vnc.c | 17 tools/libxc/ia64/xc_ia64_hvm_build.c | 30 tools/libxc/xc_hvm_build.c | 57 - tools/libxc/xc_linux_build.c | 10 tools/libxc/xc_linux_restore.c | 2 tools/pygrub/src/GrubConf.py | 72 - tools/pygrub/src/pygrub | 541 ++++++--- tools/python/xen/lowlevel/xc/xc.c | 46 tools/python/xen/util/xmlrpclib2.py | 5 tools/python/xen/xend/XendDomain.py | 4 tools/python/xen/xend/image.py | 11 tools/python/xen/xend/sxp.py | 2 tools/python/xen/xend/tests/test_sxp.py | 21 tools/python/xen/xend/tests/xend-config.sxp | 132 ++ tools/python/xen/xm/create.py | 61 - tools/python/xen/xm/main.py | 69 + tools/python/xen/xm/tests/test_create.py | 3 tools/xenstat/libxenstat/src/xenstat.c | 101 + tools/xm-test/lib/XmTestLib/XenDevice.py | 1 xen/arch/ia64/Rules.mk | 4 xen/arch/ia64/asm-offsets.c | 8 xen/arch/ia64/vmx/mmio.c | 20 xen/arch/ia64/vmx/vmmu.c | 59 - xen/arch/ia64/vmx/vmx_entry.S | 5 xen/arch/ia64/vmx/vmx_hypercall.c | 220 --- xen/arch/ia64/vmx/vmx_ivt.S | 68 - xen/arch/ia64/vmx/vmx_minstate.h | 7 xen/arch/ia64/vmx/vmx_phy_mode.c | 47 xen/arch/ia64/vmx/vmx_process.c | 36 xen/arch/ia64/vmx/vmx_vcpu.c | 9 xen/arch/ia64/vmx/vmx_virt.c | 9 xen/arch/ia64/vmx/vtlb.c | 194 +-- xen/arch/ia64/xen/dom0_ops.c | 134 -- xen/arch/ia64/xen/dom_fw.c | 81 - xen/arch/ia64/xen/domain.c | 134 -- xen/arch/ia64/xen/faults.c | 28 xen/arch/ia64/xen/hypercall.c | 14 xen/arch/ia64/xen/hyperprivop.S | 173 +-- xen/arch/ia64/xen/mm.c | 179 --- xen/arch/ia64/xen/privop.c | 44 xen/arch/ia64/xen/privop_stat.c | 249 ---- xen/arch/ia64/xen/vcpu.c | 20 xen/arch/ia64/xen/vhpt.c | 5 xen/arch/ia64/xen/xensetup.c | 14 xen/arch/powerpc/setup.c | 2 xen/arch/x86/domain.c | 9 xen/arch/x86/flushtlb.c | 4 xen/arch/x86/hvm/hvm.c | 69 + xen/arch/x86/hvm/intercept.c | 68 + xen/arch/x86/hvm/io.c | 89 - xen/arch/x86/hvm/platform.c | 54 xen/arch/x86/hvm/svm/intr.c | 15 xen/arch/x86/hvm/svm/svm.c | 6 xen/arch/x86/hvm/svm/vmcb.c | 34 xen/arch/x86/hvm/svm/x86_32/exits.S | 3 xen/arch/x86/hvm/svm/x86_64/exits.S | 2 xen/arch/x86/hvm/vioapic.c | 12 xen/arch/x86/hvm/vlapic.c | 69 - xen/arch/x86/hvm/vmx/io.c | 28 xen/arch/x86/hvm/vmx/vmcs.c | 12 xen/arch/x86/hvm/vmx/vmx.c | 131 +- xen/arch/x86/hvm/vmx/x86_32/exits.S | 3 xen/arch/x86/hvm/vmx/x86_64/exits.S | 2 xen/arch/x86/irq.c | 87 - xen/arch/x86/mm.c | 567 +--------- xen/arch/x86/nmi.c | 38 xen/arch/x86/time.c | 64 - xen/arch/x86/traps.c | 46 xen/arch/x86/x86_32/domain_page.c | 3 xen/arch/x86/x86_32/traps.c | 2 xen/arch/x86/x86_32/xen.lds.S | 3 xen/arch/x86/x86_64/xen.lds.S | 3 xen/common/domain.c | 11 xen/common/grant_table.c | 221 +++ xen/common/multicall.c | 4 xen/common/page_alloc.c | 2 xen/common/sched_bvt.c | 16 xen/common/sched_credit.c | 31 xen/common/sched_sedf.c | 21 xen/common/schedule.c | 65 - xen/common/timer.c | 86 - xen/common/trace.c | 17 xen/drivers/char/console.c | 2 xen/include/asm-ia64/bundle.h | 2 xen/include/asm-ia64/config.h | 3 xen/include/asm-ia64/domain.h | 2 xen/include/asm-ia64/grant_table.h | 18 xen/include/asm-ia64/linux-xen/asm/cache.h | 2 xen/include/asm-ia64/linux-xen/asm/io.h | 6 xen/include/asm-ia64/linux-xen/asm/pgtable.h | 3 xen/include/asm-ia64/mm.h | 8 xen/include/asm-ia64/perfc_defn.h | 43 xen/include/asm-ia64/privop_stat.h | 44 xen/include/asm-ia64/shadow.h | 2 xen/include/asm-ia64/vhpt.h | 5 xen/include/asm-ia64/vmmu.h | 7 xen/include/asm-ia64/vmx_platform.h | 2 xen/include/asm-ia64/vmx_vcpu.h | 3 xen/include/asm-ia64/vmx_vpd.h | 3 xen/include/asm-powerpc/cache.h | 2 xen/include/asm-powerpc/flushtlb.h | 15 xen/include/asm-powerpc/mm.h | 6 xen/include/asm-x86/cache.h | 2 xen/include/asm-x86/current.h | 2 xen/include/asm-x86/domain.h | 3 xen/include/asm-x86/e820.h | 1 xen/include/asm-x86/flushtlb.h | 16 xen/include/asm-x86/hvm/domain.h | 2 xen/include/asm-x86/hvm/hvm.h | 12 xen/include/asm-x86/hvm/io.h | 3 xen/include/asm-x86/hvm/support.h | 8 xen/include/asm-x86/hvm/vcpu.h | 2 xen/include/asm-x86/hvm/vioapic.h | 5 xen/include/asm-x86/hvm/vmx/vmx.h | 359 +++--- xen/include/asm-x86/mm.h | 54 xen/include/asm-x86/perfc.h | 12 xen/include/public/arch-ia64.h | 2 xen/include/public/arch-powerpc.h | 1 xen/include/public/arch-x86_32.h | 1 xen/include/public/arch-x86_64.h | 1 xen/include/public/grant_table.h | 47 xen/include/public/hvm/hvm_info_table.h | 2 xen/include/public/hvm/ioreq.h | 9 xen/include/public/xen.h | 6 xen/include/xen/config.h | 1 xen/include/xen/event.h | 1 xen/include/xen/mm.h | 4 xen/include/xen/multicall.h | 5 xen/include/xen/percpu.h | 1 xen/include/xen/sched-if.h | 10 200 files changed, 5118 insertions(+), 4896 deletions(-) diff -r bb510c274af8 -r 5f92043a3ab1 buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Fri Aug 11 13:30:48 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Sun Aug 13 12:00:38 2006 -0400 @@ -91,7 +91,6 @@ CONFIG_EFI=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y -CONFIG_XEN_IA64_DOM0_VP=y CONFIG_XEN_IA64_VDSO_PARAVIRT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y @@ -118,7 +117,7 @@ CONFIG_IOSAPIC=y CONFIG_IOSAPIC=y CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=16 CONFIG_HOTPLUG_CPU=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set @@ -1533,8 +1532,9 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y CONFIG_XEN_PCIDEV_BACKEND=y -CONFIG_XEN_PCIDEV_BACKEND_VPCI=y +# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set +CONFIG_XEN_PCIDEV_BACKEND_SLOT=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set # CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y diff -r bb510c274af8 -r 5f92043a3ab1 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Fri Aug 11 13:30:48 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Sun Aug 13 12:00:38 2006 -0400 @@ -1320,6 +1320,7 @@ CONFIG_XEN_PCIDEV_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r bb510c274af8 -r 5f92043a3ab1 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Fri Aug 11 13:30:48 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Sun Aug 13 12:00:38 2006 -0400 @@ -1261,6 +1261,7 @@ CONFIG_XEN_PCIDEV_BACKEND=y CONFIG_XEN_PCIDEV_BACKEND=y # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r bb510c274af8 -r 5f92043a3ab1 buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Fri Aug 11 13:30:48 2006 -0400 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Sun Aug 13 12:00:38 2006 -0400 @@ -88,7 +88,6 @@ CONFIG_EFI=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y -CONFIG_XEN_IA64_DOM0_VP=y CONFIG_XEN_IA64_VDSO_PARAVIRT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y diff -r bb510c274af8 -r 5f92043a3ab1 buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Fri Aug 11 13:30:48 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_ia64 Sun Aug 13 12:00:38 2006 -0400 @@ -91,7 +91,6 @@ CONFIG_EFI=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y -CONFIG_XEN_IA64_DOM0_VP=y CONFIG_XEN_IA64_VDSO_PARAVIRT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y @@ -118,7 +117,7 @@ CONFIG_IOSAPIC=y CONFIG_IOSAPIC=y CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=16 CONFIG_HOTPLUG_CPU=y # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set @@ -1539,8 +1538,9 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set CONFIG_XEN_NETDEV_LOOPBACK=y CONFIG_XEN_PCIDEV_BACKEND=y -CONFIG_XEN_PCIDEV_BACKEND_VPCI=y +# CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set +CONFIG_XEN_PCIDEV_BACKEND_SLOT=y # CONFIG_XEN_PCIDEV_BE_DEBUG is not set # CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y diff -r bb510c274af8 -r 5f92043a3ab1 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Fri Aug 11 13:30:48 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Sun Aug 13 12:00:38 2006 -0400 @@ -3021,6 +3021,7 @@ CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_BACKEND_VPCI=y # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r bb510c274af8 -r 5f92043a3ab1 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Fri Aug 11 13:30:48 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Sun Aug 13 12:00:38 2006 -0400 @@ -2853,6 +2853,7 @@ CONFIG_XEN_PCIDEV_BACKEND=m CONFIG_XEN_PCIDEV_BACKEND=m # CONFIG_XEN_PCIDEV_BACKEND_VPCI is not set CONFIG_XEN_PCIDEV_BACKEND_PASS=y +# CONFIG_XEN_PCIDEV_BACKEND_SLOT is not set # CONFIG_XEN_PCIDEV_BE_DEBUG is not set CONFIG_XEN_BLKDEV_BACKEND=y CONFIG_XEN_BLKDEV_TAP=y diff -r bb510c274af8 -r 5f92043a3ab1 docs/src/user.tex --- a/docs/src/user.tex Fri Aug 11 13:30:48 2006 -0400 +++ b/docs/src/user.tex Sun Aug 13 12:00:38 2006 -0400 @@ -1089,6 +1089,9 @@ The \path{xm list} command also supports The \path{xm list} command also supports a long output format when the \path{-l} switch is used. This outputs the full details of the running domains in \xend's SXP configuration format. + +If you want to know how long your domains have been running for, then +you can use the \verb_# xm uptime_ command. You can get access to the console of a particular domain using diff -r bb510c274af8 -r 5f92043a3ab1 extras/mini-os/Makefile --- a/extras/mini-os/Makefile Fri Aug 11 13:30:48 2006 -0400 +++ b/extras/mini-os/Makefile Sun Aug 13 12:00:38 2006 -0400 @@ -11,26 +11,54 @@ CFLAGS := -fno-builtin -Wall -Werror -Wr CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline -override CPPFLAGS := -Iinclude $(CPPFLAGS) ASFLAGS = -D__ASSEMBLY__ LDLIBS = -L. -lminios LDFLAGS := -N -T minios-$(TARGET_ARCH).lds +# For possible special source directories. +EXTRA_SRC = +# For possible special header directories. +EXTRA_INC = + +# Standard name for architecture specific subdirectories. +TARGET_ARCH_DIR = $(TARGET_ARCH) +# This is used for architecture specific links. +ARCH_LINKS = + ifeq ($(TARGET_ARCH),x86_32) CFLAGS += -m32 -march=i686 LDFLAGS += -m elf_i386 +TARGET_ARCH_DIR = x86 +EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) endif ifeq ($(TARGET_ARCH)$(pae),x86_32y) CFLAGS += -DCONFIG_X86_PAE=1 ASFLAGS += -DCONFIG_X86_PAE=1 +TARGET_ARCH_DIR = x86 +EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) endif ifeq ($(TARGET_ARCH),x86_64) CFLAGS += -m64 -mno-red-zone -fpic -fno-reorder-blocks CFLAGS += -fno-asynchronous-unwind-tables LDFLAGS += -m elf_x86_64 +TARGET_ARCH_DIR = x86 +EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH) +EXTRA_SRC += arch/$(EXTRA_INC) +endif + +ifeq ($(TARGET_ARCH),ia64) +CFLAGS += -mfixed-range=f12-f15,f32-f127 +ASFLAGS += -x assembler-with-cpp -ansi -Wall +ASFLAGS += -mfixed-range=f12-f15,f32-f127 +ARCH_LINKS = IA64_LINKS # Special link on ia64 needed +define arch_links +[ -e include/ia64/asm-xsi-offsets.h ] || ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/ia64/asm-xsi-offsets.h +endef endif ifeq ($(debug),y) @@ -39,6 +67,10 @@ CFLAGS += -O3 CFLAGS += -O3 endif +# Add the special header directories to the include paths. +extra_incl := $(foreach dir,$(EXTRA_INC),-Iinclude/$(dir)) +override CPPFLAGS := -Iinclude $(CPPFLAGS) -Iinclude/$(TARGET_ARCH_DIR) $(extra_incl) + TARGET := mini-os HEAD := $(TARGET_ARCH).o @@ -46,15 +78,32 @@ OBJS += $(patsubst %.c,%.o,$(wildcard li OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard console/*.c)) +OBJS += $(patsubst %.S,%.o,$(wildcard arch/$(TARGET_ARCH_DIR)/*.S)) +OBJS += $(patsubst %.c,%.o,$(wildcard arch/$(TARGET_ARCH_DIR)/*.c)) +# For special wanted source directories. +extra_objs := $(foreach dir,$(EXTRA_SRC),$(patsubst %.c,%.o,$(wildcard $(dir)/*.c))) +OBJS += $(extra_objs) +extra_objs := $(foreach dir,$(EXTRA_SRC),$(patsubst %.S,%.o,$(wildcard $(dir)/*.S))) +OBJS += $(extra_objs) HDRS := $(wildcard include/*.h) HDRS += $(wildcard include/xen/*.h) +HDRS += $(wildcard include/$(TARGET_ARCH_DIR)/*.h) +# For special wanted header directories. +extra_heads := $(foreach dir,$(EXTRA_INC),$(wildcard $(dir)/*.h)) +HDRS += $(extra_heads) .PHONY: default default: $(TARGET) +# Create special architecture specific links. +ifneq ($(ARCH_LINKS),) +$(ARCH_LINKS): + $(arch_links) +endif + .PHONY: links -links: +links: $(ARCH_LINKS) [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen libminios.a: links $(OBJS) $(HEAD) diff -r bb510c274af8 -r 5f92043a3ab1 extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Fri Aug 11 13:30:48 2006 -0400 +++ b/extras/mini-os/include/hypervisor.h Sun Aug 13 12:00:38 2006 -0400 @@ -7,6 +7,7 @@ * Copyright (c) 2002, K A Fraser * Copyright (c) 2005, Grzegorz Milos * Updates: Aravindh Puthiyaparambil <aravindh.puthiyaparambil@xxxxxxxxxx> + * Updates: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxxxxxxx> for ia64 */ #ifndef _HYPERVISOR_H_ @@ -19,6 +20,8 @@ #include <hypercall-x86_32.h> #elif defined(__x86_64__) #include <hypercall-x86_64.h> +#elif defined(__ia64__) +#include <hypercall-ia64.h> #else #error "Unsupported architecture" #endif diff -r bb510c274af8 -r 5f92043a3ab1 extras/mini-os/include/types.h --- a/extras/mini-os/include/types.h Fri Aug 11 13:30:48 2006 -0400 +++ b/extras/mini-os/include/types.h Sun Aug 13 12:00:38 2006 -0400 @@ -29,7 +29,7 @@ typedef unsigned int u32; #ifdef __i386__ typedef signed long long s64; typedef unsigned long long u64; -#elif defined(__x86_64__) +#elif defined(__x86_64__) || defined(__ia64__) typedef signed long s64; typedef unsigned long u64; #endif @@ -49,7 +49,7 @@ typedef struct { unsigned long pte_low, typedef struct { unsigned long pte_low, pte_high; } pte_t; #endif /* CONFIG_X86_PAE */ -#elif defined(__x86_64__) +#elif defined(__x86_64__) || defined(__ia64__) typedef long quad_t; typedef unsigned long u_quad_t; typedef unsigned long uintptr_t; diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Sun Aug 13 12:00:38 2006 -0400 @@ -56,20 +56,6 @@ config XEN help Enable Xen hypervisor support. Resulting kernel runs both as a guest OS on Xen and natively on hardware. - -config XEN_IA64_DOM0_VP - bool "dom0 vp model" - depends on XEN - default n - help - dom0 vp model - -config XEN_IA64_DOM0_NON_VP - bool - depends on XEN && !XEN_IA64_DOM0_VP - default y - help - dom0 P=M model config XEN_IA64_VDSO_PARAVIRT bool @@ -516,25 +502,25 @@ source "crypto/Kconfig" # if XEN config XEN_UTIL - default n if XEN_IA64_DOM0_VP + default n config HAVE_ARCH_ALLOC_SKB - default n if !XEN_IA64_DOM0_VP + default y config HAVE_ARCH_DEV_ALLOC_SKB - default n if !XEN_IA64_DOM0_VP + default y config XEN_BALLOON - default n if !XEN_IA64_DOM0_VP + default y config XEN_SKBUFF - default n if !XEN_IA64_DOM0_VP + default y config XEN_NETDEV_BACKEND - default n if !XEN_IA64_DOM0_VP + default y config XEN_NETDEV_FRONTEND - default n if !XEN_IA64_DOM0_VP + default y config XEN_DEVMEM default n diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/arch/ia64/kernel/gate.S --- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S Sun Aug 13 12:00:38 2006 -0400 @@ -130,7 +130,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc) // r20 = 1 // r22 = &vcpu->evtchn_mask // r23 = &vpsr.ic - // r24 = vcpu->pending_interruption + // r24 = &vcpu->pending_interruption // r25 = tmp // r28 = &running_on_xen // r30 = running_on_xen diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/arch/ia64/xen/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile Sun Aug 13 12:00:38 2006 -0400 @@ -2,7 +2,7 @@ # Makefile for Xen components # -obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o +obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \ + hypervisor.o pci-dma-xen.o util.o -obj-$(CONFIG_XEN_IA64_DOM0_VP) += hypervisor.o pci-dma-xen.o util.o -pci-dma-xen-$(CONFIG_XEN_IA64_DOM0_VP) := ../../i386/kernel/pci-dma-xen.o +pci-dma-xen-y := ../../i386/kernel/pci-dma-xen.o diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S Sun Aug 13 12:00:38 2006 -0400 @@ -379,30 +379,35 @@ GLOBAL_ENTRY(xen_get_psr) ;; END(xen_get_psr) + // see xen_ssm_i() in privop.h + // r22 = &vcpu->evtchn_mask + // r23 = &vpsr.ic + // r24 = &vcpu->pending_interruption + // r25 = tmp + // r31 = tmp + // p11 = tmp + // p14 = tmp +#define XEN_SET_PSR_I \ + ld4 r31=[r22]; \ + ld4 r25=[r24]; \ + ;; \ + st4 [r22]=r0; \ + cmp.ne.unc p14,p0=r0,r31; \ + ;; \ +(p14) cmp.ne.unc p11,p0=r0,r25; \ + ;; \ +(p11) st4 [r22]=r20; \ +(p11) st4 [r23]=r0; \ +(p11) XEN_HYPER_SSM_I; + GLOBAL_ENTRY(xen_ssm_i_0) - st4 [r22]=r20 - ld4 r25=[r24] - ;; - cmp.ne.unc p11,p0=r0, r25 - ;; -(p11) st4 [r22]=r0 -(p11) st4 [r23]=r0 -(p11) XEN_HYPER_SSM_I - + XEN_SET_PSR_I brl.cond.sptk .vdso_ssm_i_0_ret ;; END(xen_ssm_i_0) GLOBAL_ENTRY(xen_ssm_i_1) - st4 [r22]=r20 - ld4 r25=[r24] - ;; - cmp.ne.unc p11,p0=r0, r25 - ;; -(p11) st4 [r22]=r0 -(p11) st4 [r23]=r0 -(p11) XEN_HYPER_SSM_I - ;; + XEN_SET_PSR_I brl.cond.sptk .vdso_ssm_i_1_ret ;; END(xen_ssm_i_1) diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Sun Aug 13 12:00:38 2006 -0400 @@ -35,6 +35,7 @@ EXPORT_SYMBOL(HYPERVISOR_shared_info); EXPORT_SYMBOL(HYPERVISOR_shared_info); start_info_t *xen_start_info; +EXPORT_SYMBOL(xen_start_info); int running_on_xen; EXPORT_SYMBOL(running_on_xen); @@ -91,6 +92,7 @@ ia64_xenmem_reservation_op(unsigned long } return ret; } +EXPORT_SYMBOL(ia64_xenmem_reservation_op); //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear() // move those to lib/contiguous_bitmap? @@ -379,12 +381,13 @@ HYPERVISOR_grant_table_op(unsigned int c return ____HYPERVISOR_grant_table_op(cmd, uop, count); } - +EXPORT_SYMBOL(HYPERVISOR_grant_table_op); /////////////////////////////////////////////////////////////////////////// // PageForeign(), SetPageForeign(), ClearPageForeign() struct address_space xen_ia64_foreign_dummy_mapping; +EXPORT_SYMBOL(xen_ia64_foreign_dummy_mapping); /////////////////////////////////////////////////////////////////////////// // foreign mapping diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/arch/ia64/xen/util.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/util.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/util.c Sun Aug 13 12:00:38 2006 -0400 @@ -54,7 +54,7 @@ struct vm_struct *alloc_vm_area(unsigned area->size = size; area->pages = NULL; //XXX area->nr_pages = nr_pages; - area->phys_addr = __pa(virt); + area->phys_addr = 0; /* xenbus_map_ring_valloc uses this field! */ return area; @@ -70,15 +70,13 @@ void free_vm_area(struct vm_struct *area { unsigned int order = get_order(area->size); unsigned long i; - - /* xenbus_map_ring_valloc overrides this field! */ - area->phys_addr = __pa(area->addr); + unsigned long phys_addr = __pa(area->addr); // This area is used for foreign page mappping. // So underlying machine page may not be assigned. for (i = 0; i < (1 << order); i++) { unsigned long ret; - unsigned long gpfn = (area->phys_addr >> PAGE_SHIFT) + i; + unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i; struct xen_memory_reservation reservation = { .nr_extents = 1, .address_bits = 0, @@ -107,21 +105,6 @@ void unlock_vm_area(struct vm_struct *ar } EXPORT_SYMBOL_GPL(unlock_vm_area); -#ifndef CONFIG_XEN_IA64_DOM0_VP -/* We just need a range of legal va here, though finally identity - * mapped one is instead used for gnttab mapping. - */ -unsigned long alloc_empty_foreign_map_page_range(unsigned long pages) -{ - struct vm_struct *vma; - - if ( (vma = get_vm_area(PAGE_SIZE * pages, VM_ALLOC)) == NULL ) - return NULL; - - return (unsigned long)vma->addr; -} -#endif - /* * Local variables: * c-file-style: "linux" diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Sun Aug 13 12:00:38 2006 -0400 @@ -117,7 +117,7 @@ config XEN_PCIDEV_BACKEND_VPCI This PCI Backend hides the true PCI topology and makes the frontend think there is a single PCI bus with only the exported devices on it. For example, a device at 03:05.0 will be re-assigned to 00:00.0. A - second device at 02:1a.0 will be re-assigned to 00:01.0. + second device at 02:1a.1 will be re-assigned to 00:01.1. config XEN_PCIDEV_BACKEND_PASS bool "Passthrough" @@ -129,6 +129,15 @@ config XEN_PCIDEV_BACKEND_PASS which depend on finding their hardward in certain bus/slot locations. +config XEN_PCIDEV_BACKEND_SLOT + bool "Slot" + ---help--- + This PCI Backend hides the true PCI topology and makes the frontend + think there is a single PCI bus with only the exported devices on it. + Contrary to the virtual PCI backend, a function becomes a new slot. + For example, a device at 03:05.2 will be re-assigned to 00:00.0. A + second device at 02:1a.1 will be re-assigned to 00:01.0. + endchoice config XEN_PCIDEV_BE_DEBUG diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sun Aug 13 12:00:38 2006 -0400 @@ -563,10 +563,14 @@ struct page *balloon_alloc_empty_page_ra set_xen_guest_handle(reservation.extent_start, &gmfn); ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + if (ret == -ENOSYS) + goto err; BUG_ON(ret != 1); } else { ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL); + if (ret == -ENOSYS) + goto err; BUG_ON(ret); } current_pages -= 1UL << order; @@ -583,6 +587,11 @@ struct page *balloon_alloc_empty_page_ra set_page_count(page + i, 1); return page; + + err: + free_pages(vstart, order); + balloon_unlock(flags); + return NULL; } void balloon_dealloc_empty_page_range( diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sun Aug 13 12:00:38 2006 -0400 @@ -398,14 +398,9 @@ static void dispatch_rw_block_io(blkif_t } pending_handle(pending_req, i) = map[i].handle; -#ifdef CONFIG_XEN_IA64_DOM0_NON_VP - pending_vaddrs[vaddr_pagenr(pending_req, i)] = - (unsigned long)gnttab_map_vaddr(map[i]); -#else set_phys_to_machine(__pa(vaddr( pending_req, i)) >> PAGE_SHIFT, FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); -#endif seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); } @@ -518,6 +513,12 @@ static int __init blkif_init(void) return -ENODEV; mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + + page = balloon_alloc_empty_page_range(mmap_pages); + if (page == NULL) + return -ENOMEM; + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * blkif_reqs, GFP_KERNEL); pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * @@ -534,16 +535,6 @@ static int __init blkif_init(void) blkif_interface_init(); -#ifdef CONFIG_XEN_IA64_DOM0_NON_VP - extern unsigned long alloc_empty_foreign_map_page_range( - unsigned long pages); - mmap_vstart = (unsigned long) - alloc_empty_foreign_map_page_range(mmap_pages); -#else /* ! ia64 */ - page = balloon_alloc_empty_page_range(mmap_pages); - BUG_ON(page == NULL); - mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); -#endif printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart); BUG_ON(mmap_vstart == 0); diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sun Aug 13 12:00:38 2006 -0400 @@ -75,12 +75,6 @@ static int map_frontend_page(blkif_t *bl blkif->shmem_ref = shared_page; blkif->shmem_handle = op.handle; - -#ifdef CONFIG_XEN_IA64_DOM0_NON_VP - /* on some arch's, map_grant_ref behaves like mmap, in that the - * passed address is a hint and a different address may be returned */ - blkif->blk_ring_area->addr = gnttab_map_vaddr(op); -#endif return 0; } diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Aug 13 12:00:38 2006 -0400 @@ -709,29 +709,18 @@ static void make_response(blkif_t *blkif /****************************************************************** * misc small helpers */ -/* FIXME: Return ENOMEM properly on failure to allocate additional reqs. */ -static void req_increase(void) +static int req_increase(void) { int i, j; struct page *page; unsigned long flags; + int ret; spin_lock_irqsave(&pending_free_lock, flags); + ret = -EINVAL; if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) goto done; - - pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) * - blkif_reqs, GFP_KERNEL); - pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) * - mmap_pages, GFP_KERNEL); - - if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) { - kfree(pending_reqs[mmap_alloc]); - kfree(pending_addrs[mmap_alloc]); - WPRINTK("%s: out of memory\n", __FUNCTION__); - goto done; - } #ifdef __ia64__ extern unsigned long alloc_empty_foreign_map_page_range( @@ -740,7 +729,11 @@ static void req_increase(void) alloc_empty_foreign_map_page_range(mmap_pages); #else /* ! ia64 */ page = balloon_alloc_empty_page_range(mmap_pages); - BUG_ON(page == NULL); + ret = -ENOMEM; + if (page == NULL) { + printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__); + goto done; + } /* Pin all of the pages. */ for (i=0; i<mmap_pages; i++) @@ -751,6 +744,23 @@ static void req_increase(void) mmap_start[mmap_alloc].mpage = page; #endif + + pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) * + blkif_reqs, GFP_KERNEL); + pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) * + mmap_pages, GFP_KERNEL); + + ret = -ENOMEM; + if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) { + kfree(pending_reqs[mmap_alloc]); + kfree(pending_addrs[mmap_alloc]); + WPRINTK("%s: out of memory\n", __FUNCTION__); + ret = -ENOMEM; + goto done; + } + + ret = 0; + DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n", __FUNCTION__, blkif_reqs, mmap_pages, mmap_start[mmap_alloc].start); @@ -774,7 +784,7 @@ static void req_increase(void) DPRINTK("# MMAPs increased to %d\n",mmap_alloc); done: spin_unlock_irqrestore(&pending_free_lock, flags); - + return ret; } static void mmap_req_del(int mmap) @@ -1299,13 +1309,8 @@ static void dispatch_rw_block_io(blkif_t = map[i].handle; pending_handle(mmap_idx, pending_idx, i/2).user = map[i+1].handle; -#ifdef CONFIG_XEN_IA64_DOM0_NON_VP - pending_addrs[mmap_idx][vaddr_pagenr(pending_req, i)] = - (unsigned long)gnttab_map_vaddr(map[i]); -#else set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); -#endif offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT; pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); ((struct page **)info->vma->vm_private_data)[offset] = @@ -1394,7 +1399,13 @@ static int __init blkif_init(void) return -ENODEV; INIT_LIST_HEAD(&pending_free); - for(i = 0; i < 2; i++) req_increase(); + for(i = 0; i < 2; i++) { + ret = req_increase(); + if (ret) + break; + } + if (i == 0) + return ret; tap_blkif_interface_init(); diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sun Aug 13 12:00:38 2006 -0400 @@ -75,12 +75,6 @@ static int map_frontend_page(blkif_t *bl blkif->shmem_ref = shared_page; blkif->shmem_handle = op.handle; - -#ifdef CONFIG_XEN_IA64_DOM0_NON_VP - /* on some arch's, map_grant_ref behaves like mmap, in that the - * passed address is a hint and a different address may be returned */ - blkif->blk_ring_area->addr = gnttab_map_vaddr(op); -#endif return 0; } diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Sun Aug 13 12:00:38 2006 -0400 @@ -178,7 +178,7 @@ static struct tty_driver *kcons_device(s static struct console kcons_info = { .device = kcons_device, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ENABLED, .index = -1, }; @@ -192,8 +192,6 @@ static int __init xen_console_init(void) if (xc_mode == XC_DEFAULT) xc_mode = XC_SERIAL; kcons_info.write = kcons_write_dom0; - if (xc_mode == XC_SERIAL) - kcons_info.flags |= CON_ENABLED; } else { if (xc_mode == XC_DEFAULT) xc_mode = XC_TTY; diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sun Aug 13 12:00:38 2006 -0400 @@ -1306,7 +1306,9 @@ static int __init netback_init(void) net_timer.function = net_alarm; page = balloon_alloc_empty_page_range(MAX_PENDING_REQS); - BUG_ON(page == NULL); + if (page == NULL) + return -ENOMEM; + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); for (i = 0; i < MAX_PENDING_REQS; i++) { diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sun Aug 13 12:00:38 2006 -0400 @@ -609,9 +609,14 @@ static void network_alloc_rx_buffers(str */ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { - /* Allocate an skb and a page. */ - skb = __dev_alloc_skb(RX_COPY_THRESHOLD, - GFP_ATOMIC | __GFP_NOWARN); + /* + * Allocate an skb and a page. Do not use __dev_alloc_skb as + * that will allocate page-sized buffers which is not + * necessary here. + * 16 bytes added as necessary headroom for netif_receive_skb. + */ + skb = alloc_skb(RX_COPY_THRESHOLD + 16, + GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto no_skb; @@ -628,6 +633,7 @@ no_skb: break; } + skb_reserve(skb, 16); /* mimic dev_alloc_skb() */ skb_shinfo(skb)->frags[0].page = page; skb_shinfo(skb)->nr_frags = 1; __skb_queue_tail(&np->rx_batch, skb); diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/pciback/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/pciback/Makefile Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/Makefile Sun Aug 13 12:00:38 2006 -0400 @@ -7,6 +7,7 @@ pciback-y += conf_space.o conf_space_hea conf_space_capability_pm.o \ conf_space_quirks.o pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sun Aug 13 12:00:38 2006 -0400 @@ -58,6 +58,8 @@ extern struct mutex xenwatch_mutex; static struct notifier_block *xenstore_chain; +static void wait_for_devices(struct xenbus_driver *xendrv); + /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) @@ -408,9 +410,18 @@ static int xenbus_register_driver_common int xenbus_register_frontend(struct xenbus_driver *drv) { + int ret; + drv->read_otherend_details = read_backend_details; - return xenbus_register_driver_common(drv, &xenbus_frontend); + ret = xenbus_register_driver_common(drv, &xenbus_frontend); + if (ret) + return ret; + + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; } EXPORT_SYMBOL_GPL(xenbus_register_frontend); @@ -1042,6 +1053,7 @@ static int is_disconnected_device(struct static int is_disconnected_device(struct device *dev, void *data) { struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; /* * A device with no driver will never connect. We care only about @@ -1050,18 +1062,27 @@ static int is_disconnected_device(struct if (!dev->driver) return 0; + /* Is this search limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + return (xendev->state != XenbusStateConnected); } -static int exists_disconnected_device(void) -{ - return bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, +static int exists_disconnected_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, is_disconnected_device); } static int print_device_status(struct device *dev, void *data) { struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + + /* Is this operation limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; if (!dev->driver) { /* Information only: is this too noisy? */ @@ -1075,6 +1096,9 @@ static int print_device_status(struct de return 0; } + +/* We only wait for device setup after most initcalls have run. */ +static int ready_to_wait_for_devices; /* * On a 10 second timeout, wait for all devices currently configured. We need @@ -1090,20 +1114,29 @@ static int print_device_status(struct de * boot slightly, but of course needs tools or manual intervention to set up * those flags correctly. */ -static int __init wait_for_devices(void) +static void wait_for_devices(struct xenbus_driver *xendrv) { unsigned long timeout = jiffies + 10*HZ; - - if (!is_running_on_xen()) - return -ENODEV; - - while (time_before(jiffies, timeout) && exists_disconnected_device()) + struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + + if (!ready_to_wait_for_devices || !is_running_on_xen()) + return; + + while (exists_disconnected_device(drv)) { + if (time_after(jiffies, timeout)) + break; schedule_timeout_interruptible(HZ/10); - - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, + } + + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, print_device_status); - - return 0; -} - -late_initcall(wait_for_devices); +} + +static int __init boot_wait_for_devices(void) +{ + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + return 0; +} + +late_initcall(boot_wait_for_devices); diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Sun Aug 13 12:00:38 2006 -0400 @@ -51,7 +51,8 @@ static inline void switch_mm(struct mm_s struct mmuext_op _op[2], *op = _op; if (likely(prev != next)) { - BUG_ON(!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)); + BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && + !test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)); /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-ia64/agp.h --- a/linux-2.6-xen-sparse/include/asm-ia64/agp.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/agp.h Sun Aug 13 12:00:38 2006 -0400 @@ -19,21 +19,10 @@ #define flush_agp_cache() mb() /* Convert a physical address to an address suitable for the GART. */ -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define phys_to_gart(x) (x) -#define gart_to_phys(x) (x) -#else #define phys_to_gart(x) phys_to_machine_for_dma(x) #define gart_to_phys(x) machine_to_phys_for_dma(x) -#endif /* GATT allocation. Returns/accepts GATT kernel virtual address. */ -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define alloc_gatt_pages(order) \ - ((char *)__get_free_pages(GFP_KERNEL, (order))) -#define free_gatt_pages(table, order) \ - free_pages((unsigned long)(table), (order)) -#else #include <asm/hypervisor.h> static inline char* alloc_gatt_pages(unsigned int order) @@ -57,6 +46,5 @@ free_gatt_pages(void* table, unsigned in xen_destroy_contiguous_region((unsigned long)table, order); free_pages((unsigned long)table, order); } -#endif /* CONFIG_XEN_IA64_DOM0_VP */ #endif /* _ASM_IA64_AGP_H */ diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h --- a/linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/dma-mapping.h Sun Aug 13 12:00:38 2006 -0400 @@ -7,28 +7,11 @@ */ #include <linux/config.h> #include <asm/machvec.h> -#ifdef CONFIG_XEN_IA64_DOM0_VP /* Needed for arch/i386/kernel/swiotlb.c and arch/i386/kernel/pci-dma-xen.c */ #include <asm/hypervisor.h> /* Needed for arch/i386/kernel/swiotlb.c */ #include <asm-i386/mach-xen/asm/swiotlb.h> -#endif -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define dma_alloc_coherent platform_dma_alloc_coherent -#define dma_alloc_noncoherent platform_dma_alloc_coherent /* coherent mem. is cheap */ -#define dma_free_coherent platform_dma_free_coherent -#define dma_free_noncoherent platform_dma_free_coherent -#define dma_map_single platform_dma_map_single -#define dma_map_sg platform_dma_map_sg -#define dma_unmap_single platform_dma_unmap_single -#define dma_unmap_sg platform_dma_unmap_sg -#define dma_sync_single_for_cpu platform_dma_sync_single_for_cpu -#define dma_sync_sg_for_cpu platform_dma_sync_sg_for_cpu -#define dma_sync_single_for_device platform_dma_sync_single_for_device -#define dma_sync_sg_for_device platform_dma_sync_sg_for_device -#define dma_mapping_error platform_dma_mapping_error -#else int dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, enum dma_data_direction direction); void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, @@ -67,7 +50,6 @@ dma_sync_sg_for_device(struct device *de swiotlb_sync_sg_for_device(dev,sg,nelems,direction); flush_write_buffers(); } -#endif #define dma_map_page(dev, pg, off, size, dir) \ dma_map_single(dev, page_address(pg) + (off), (size), (dir)) @@ -109,7 +91,6 @@ dma_cache_sync (void *vaddr, size_t size #define dma_is_consistent(dma_handle) (1) /* all we do is coherent memory... */ -#ifdef CONFIG_XEN_IA64_DOM0_VP /* arch/i386/kernel/swiotlb.o requires */ void contiguous_bitmap_init(unsigned long end_pfn); @@ -130,8 +111,5 @@ range_straddles_page_boundary(void *p, s return (((((unsigned long)p & ~PAGE_MASK) + size) > PAGE_SIZE) && !test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap)); } -#else -#define contiguous_bitmap_init(end_pfn) ((void)end_pfn) -#endif #endif /* _ASM_IA64_DMA_MAPPING_H */ diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Sun Aug 13 12:00:38 2006 -0400 @@ -195,14 +195,6 @@ HYPERVISOR_multicall( return _hypercall2(int, multicall, call_list, nr_calls); } -#ifndef CONFIG_XEN_IA64_DOM0_VP -static inline int -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - return _hypercall2(int, memory_op, cmd, arg); -} -#else //XXX xen/ia64 copy_from_guest() is broken. // This is a temporal work around until it is fixed. static inline int @@ -230,7 +222,6 @@ HYPERVISOR_memory_op( } /* NOTREACHED */ } -#endif static inline int HYPERVISOR_event_channel_op( @@ -288,12 +279,8 @@ ____HYPERVISOR_grant_table_op( { return _hypercall3(int, grant_table_op, cmd, uop, count); } -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define HYPERVISOR_grant_table_op(cmd, uop, count) \ - ____HYPERVISOR_grant_table_op((cmd), (uop), (count)) -#else + int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count); -#endif static inline int HYPERVISOR_vcpu_op( @@ -319,7 +306,6 @@ static inline void exit_idle(void) {} irq_exit(); \ }) -#ifdef CONFIG_XEN_IA64_DOM0_VP #include <linux/err.h> #include <asm/xen/privop.h> @@ -507,11 +493,4 @@ HYPERVISOR_add_physmap(unsigned long gpf // for balloon driver #define HYPERVISOR_update_va_mapping(va, new_val, flags) (0) -#else -#define HYPERVISOR_ioremap(ioaddr, size) (ioaddr) -#define HYPERVISOR_phystomach(gpfn) (gpfn) -#define HYPERVISOR_machtophys(mfn) (mfn) -#define HYPERVISOR_zap_physmap(gpfn, extent_order) (0) -#define HYPERVISOR_add_physmap(gpfn, mfn, flags) (0) -#endif #endif /* __HYPERCALL_H__ */ diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Sun Aug 13 12:00:38 2006 -0400 @@ -125,11 +125,6 @@ HYPERVISOR_poll( // for drivers/xen/privcmd/privcmd.c #define machine_to_phys_mapping 0 -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define direct_remap_pfn_range(a,b,c,d,e,f) remap_pfn_range(a,b,c,d,e) -#define pfn_to_mfn(x) (x) -#define mfn_to_pfn(x) (x) -#else struct vm_area_struct; int direct_remap_pfn_range(struct vm_area_struct *vma, unsigned long address, @@ -140,7 +135,6 @@ struct file; struct file; int privcmd_mmap(struct file * file, struct vm_area_struct * vma); #define HAVE_ARCH_PRIVCMD_MMAP -#endif // for drivers/xen/balloon/balloon.c #ifdef CONFIG_XEN_SCRUB_PAGES @@ -152,12 +146,7 @@ int privcmd_mmap(struct file * file, str #define __pte_ma(_x) ((pte_t) {(_x)}) #define phys_to_machine_mapping_valid(_x) (1) #define pfn_pte_ma(_x,_y) __pte_ma(0) -#ifndef CONFIG_XEN_IA64_DOM0_VP //XXX -#define set_phys_to_machine(_x,_y) do {} while (0) -#define xen_machphys_update(_x,_y) do {} while (0) -#endif -#ifdef CONFIG_XEN_IA64_DOM0_VP int __xen_create_contiguous_region(unsigned long vstart, unsigned int order, unsigned int address_bits); static inline int xen_create_contiguous_region(unsigned long vstart, @@ -191,11 +180,6 @@ MULTI_update_va_mapping( mcl->result = 0; } -#else -#define xen_create_contiguous_region(vstart, order, address_bits) (0) -#define xen_destroy_contiguous_region(vstart, order) do {} while (0) -#endif - // for debug asmlinkage int xprintk(const char *fmt, ...); #define xprintd(fmt, ...) xprintk("%s:%d " fmt, __func__, __LINE__, \ diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-ia64/io.h --- a/linux-2.6-xen-sparse/include/asm-ia64/io.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/io.h Sun Aug 13 12:00:38 2006 -0400 @@ -97,13 +97,6 @@ extern int valid_mmap_phys_addr_range (u * The following two macros are deprecated and scheduled for removal. * Please use the PCI-DMA interface defined in <asm/pci.h> instead. */ -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define bus_to_virt phys_to_virt -#define virt_to_bus virt_to_phys -#define page_to_bus page_to_phys -#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) -#define page_to_pseudophys(page) page_to_phys(page) -#else #define bus_to_virt(bus) \ phys_to_virt(machine_to_phys_for_dma(bus)) #define virt_to_bus(virt) \ @@ -131,7 +124,6 @@ extern int valid_mmap_phys_addr_range (u (((bvec_to_bus((vec1)) + (vec1)->bv_len) == bvec_to_bus((vec2))) && \ ((bvec_to_pseudophys((vec1)) + (vec1)->bv_len) == \ bvec_to_pseudophys((vec2)))) -#endif # endif /* KERNEL */ diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-ia64/machvec.h --- a/linux-2.6-xen-sparse/include/asm-ia64/machvec.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/machvec.h Sun Aug 13 12:00:38 2006 -0400 @@ -247,7 +247,6 @@ extern void machvec_init (const char *na # error Unknown configuration. Update asm-ia64/machvec.h. # endif /* CONFIG_IA64_GENERIC */ -#ifdef CONFIG_XEN_IA64_DOM0_VP # define platform_dma_map_sg dma_map_sg # define platform_dma_unmap_sg dma_unmap_sg # define platform_dma_mapping_error dma_mapping_error @@ -260,7 +259,6 @@ extern void machvec_init (const char *na dma_sync_single_for_cpu # define platform_dma_sync_single_for_device \ dma_sync_single_for_device -#endif /* * Declare default routines which aren't declared anywhere else: diff -r bb510c274af8 -r 5f92043a3ab1 linux-2.6-xen-sparse/include/asm-ia64/page.h --- a/linux-2.6-xen-sparse/include/asm-ia64/page.h Fri Aug 11 13:30:48 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h Sun Aug 13 12:00:38 2006 -0400 @@ -223,15 +223,6 @@ get_order (unsigned long size) #define INVALID_P2M_ENTRY (~0UL) -#ifndef CONFIG_XEN_IA64_DOM0_VP - -#define virt_to_machine(v) __pa(v) -#define machine_to_virt(m) __va(m) -#define virt_to_mfn(v) ((__pa(v)) >> PAGE_SHIFT) -#define mfn_to_virt(m) (__va((m) << PAGE_SHIFT)) - -#else - #include <linux/kernel.h> #include <asm/hypervisor.h> #include <xen/features.h> // to compile netback, netfront @@ -337,7 +328,6 @@ mfn_to_local_pfn(unsigned long mfn) return pfn; } -#endif /* CONFIG_XEN_IA64_DOM0_VP */ #endif /* CONFIG_XEN */ #endif /* __ASSEMBLY__ */ diff -r bb510c274af8 -r 5f92043a3ab1 tools/examples/xend-config.sxp --- a/tools/examples/xend-config.sxp Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/examples/xend-config.sxp Sun Aug 13 12:00:38 2006 -0400 @@ -54,7 +54,7 @@ # (xend-relocation-hosts-allow '^localhost$ ^.*\.example\.org$') # #(xend-relocation-hosts-allow '') -(xend-relocation-hosts-allow '^localhost$') +(xend-relocation-hosts-allow '^localhost$ ^localhost\\.localdomain$') # The limit (in kilobytes) on the size of the console buffer #(console-limit 1024) diff -r bb510c274af8 -r 5f92043a3ab1 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/examples/xmexample.hvm Sun Aug 13 12:00:38 2006 -0400 @@ -130,6 +130,10 @@ vnc=1 #vncdisplay=1 #---------------------------------------------------------------------------- +# try to find an unused port for the VNC server, default = 1 +#vncunused=1 + +#---------------------------------------------------------------------------- # enable spawning vncviewer for domain's console # (only valid when vnc=1), default = 0 #vncconsole=0 diff -r bb510c274af8 -r 5f92043a3ab1 tools/firmware/Makefile --- a/tools/firmware/Makefile Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/firmware/Makefile Sun Aug 13 12:00:38 2006 -0400 @@ -30,7 +30,7 @@ all: .PHONY: install install: all [ -d $(INSTALL_DIR) ] || install -d -m0755 $(INSTALL_DIR) - install -m0644 $(TARGET) $(INSTALL_DIR) + [ ! -e $(TARGET) ] || install -m0644 $(TARGET) $(INSTALL_DIR) .PHONY: clean clean: diff -r bb510c274af8 -r 5f92043a3ab1 tools/firmware/acpi/acpi_fadt.h --- a/tools/firmware/acpi/acpi_fadt.h Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/firmware/acpi/acpi_fadt.h Sun Aug 13 12:00:38 2006 -0400 @@ -59,8 +59,7 @@ #define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO #define ACPI_PM1A_EVT_BLK_BIT_WIDTH 0x20 #define ACPI_PM1A_EVT_BLK_BIT_OFFSET 0x00 -//#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c010 -#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c040 +#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c010 // // PM1B Event Register Block Generic Address Information diff -r bb510c274af8 -r 5f92043a3ab1 tools/firmware/hvmloader/hvmloader.c --- a/tools/firmware/hvmloader/hvmloader.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/firmware/hvmloader/hvmloader.c Sun Aug 13 12:00:38 2006 -0400 @@ -26,7 +26,7 @@ #include "hypercall.h" #include "util.h" #include <xen/version.h> -#include <xen/hvm/hvm_info_table.h> +#include <xen/hvm/params.h> /* memory map */ #define HYPERCALL_PHYSICAL_ADDRESS 0x00080000 @@ -172,7 +172,7 @@ int int main(void) { - struct hvm_info_table *t = get_hvm_info_table(); + struct xen_hvm_param hvm_param; puts("HVM Loader\n"); @@ -180,7 +180,10 @@ main(void) puts("Loading ROMBIOS ...\n"); memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, sizeof(rombios)); - if (t->apic_enabled) + + hvm_param.domid = DOMID_SELF; + hvm_param.index = HVM_PARAM_APIC_ENABLED; + if (!hypercall_hvm_op(HVMOP_get_param, &hvm_param) && hvm_param.value) create_mp_tables(); if (cirrus_check()) { diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/hw/pc.c Sun Aug 13 12:00:38 2006 -0400 @@ -572,9 +572,6 @@ static int parallel_io[MAX_PARALLEL_PORT static int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc }; static int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 }; -/* PIIX4 acpi pci configuration space, func 3 */ -extern void pci_piix4_acpi_init(PCIBus *bus, int devfn); - #ifdef HAS_AUDIO static void audio_init (PCIBus *pci_bus) { @@ -879,15 +876,17 @@ static void pc_init1(uint64_t ram_size, /* using PIIX4 acpi model */ if (pci_enabled && acpi_enabled) - pci_piix4_acpi_init(pci_bus, piix3_devfn + 3); + pci_piix4_acpi_init(pci_bus, piix3_devfn + 2); if (pci_enabled && usb_enabled) { - usb_uhci_init(pci_bus, piix3_devfn + 2); - } - - if (pci_enabled && acpi_enabled && 0) { + usb_uhci_init(pci_bus, piix3_devfn + (acpi_enabled ? 3 : 2)); + } + +#ifndef CONFIG_DM + if (pci_enabled && acpi_enabled) { piix4_pm_init(pci_bus, piix3_devfn + 3); } +#endif /* !CONFIG_DM */ #if 0 /* ??? Need to figure out some way for the user to @@ -910,8 +909,10 @@ static void pc_init1(uint64_t ram_size, /* XXX: should be done in the Bochs BIOS */ if (pci_enabled) { pci_bios_init(); +#ifndef CONFIG_DM if (acpi_enabled) acpi_bios_init(); +#endif /* !CONFIG_DM */ } } diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/hw/piix4acpi.c --- a/tools/ioemu/hw/piix4acpi.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/hw/piix4acpi.c Sun Aug 13 12:00:38 2006 -0400 @@ -374,13 +374,13 @@ static void acpi_map(PCIDevice *pci_dev, register_ioport_read(addr + 8, 4, 4, acpiPm1Timer_readl, d); } -/* PIIX4 acpi pci configuration space, func 3 */ +/* PIIX4 acpi pci configuration space, func 2 */ void pci_piix4_acpi_init(PCIBus *bus, int devfn) { PCIAcpiState *d; uint8_t *pci_conf; - /* register a function 3 of PIIX4 */ + /* register a function 2 of PIIX4 */ d = (PCIAcpiState *)pci_register_device( bus, "PIIX4 ACPI", sizeof(PCIAcpiState), devfn, NULL, NULL); diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/acpi-support --- a/tools/ioemu/patches/acpi-support Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/acpi-support Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-06 02:23:23.000000000 +0100 -+++ ioemu/Makefile.target 2006-08-07 17:38:47.698306442 +0100 +--- ioemu.orig/Makefile.target 2006-08-09 19:54:26.055548240 +0100 ++++ ioemu/Makefile.target 2006-08-09 21:29:37.834611244 +0100 @@ -357,6 +357,7 @@ VL_OBJS+= fdc.o mc146818rtc.o serial.o pc.o VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o @@ -12,39 +12,44 @@ Index: ioemu/Makefile.target ifeq ($(TARGET_BASE_ARCH), ppc) Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-06 02:23:45.000000000 +0100 -+++ ioemu/hw/pc.c 2006-08-07 17:42:00.939426374 +0100 -@@ -572,6 +572,9 @@ - static int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc }; - static int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 }; - -+/* PIIX4 acpi pci configuration space, func 3 */ -+extern void pci_piix4_acpi_init(PCIBus *bus, int devfn); -+ - #ifdef HAS_AUDIO - static void audio_init (PCIBus *pci_bus) - { -@@ -874,11 +877,15 @@ +--- ioemu.orig/hw/pc.c 2006-08-09 19:54:26.133539447 +0100 ++++ ioemu/hw/pc.c 2006-08-09 21:30:30.188733212 +0100 +@@ -874,13 +874,19 @@ cmos_init(ram_size, boot_device, bs_table, timeoffset); + /* using PIIX4 acpi model */ + if (pci_enabled && acpi_enabled) -+ pci_piix4_acpi_init(pci_bus, piix3_devfn + 3); ++ pci_piix4_acpi_init(pci_bus, piix3_devfn + 2); + if (pci_enabled && usb_enabled) { - usb_uhci_init(pci_bus, piix3_devfn + 2); +- usb_uhci_init(pci_bus, piix3_devfn + 2); ++ usb_uhci_init(pci_bus, piix3_devfn + (acpi_enabled ? 3 : 2)); } -- if (pci_enabled && acpi_enabled) { -+ if (pci_enabled && acpi_enabled && 0) { ++#ifndef CONFIG_DM + if (pci_enabled && acpi_enabled) { piix4_pm_init(pci_bus, piix3_devfn + 3); } ++#endif /* !CONFIG_DM */ + + #if 0 + /* ??? Need to figure out some way for the user to +@@ -903,8 +909,10 @@ + /* XXX: should be done in the Bochs BIOS */ + if (pci_enabled) { + pci_bios_init(); ++#ifndef CONFIG_DM + if (acpi_enabled) + acpi_bios_init(); ++#endif /* !CONFIG_DM */ + } + } Index: ioemu/hw/piix4acpi.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/hw/piix4acpi.c 2006-08-07 17:41:41.932577728 +0100 ++++ ioemu/hw/piix4acpi.c 2006-08-09 21:32:04.400129788 +0100 @@ -0,0 +1,388 @@ +/* + * PIIX4 ACPI controller emulation @@ -405,13 +410,13 @@ Index: ioemu/hw/piix4acpi.c +} + + -+/* PIIX4 acpi pci configuration space, func 3 */ ++/* PIIX4 acpi pci configuration space, func 2 */ +void pci_piix4_acpi_init(PCIBus *bus, int devfn) +{ + PCIAcpiState *d; + uint8_t *pci_conf; + -+ /* register a function 3 of PIIX4 */ ++ /* register a function 2 of PIIX4 */ + d = (PCIAcpiState *)pci_register_device( + bus, "PIIX4 ACPI", sizeof(PCIAcpiState), + devfn, NULL, NULL); @@ -436,8 +441,8 @@ Index: ioemu/hw/piix4acpi.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:23:45.000000000 +0100 -+++ ioemu/vl.c 2006-08-07 17:41:40.613727012 +0100 +--- ioemu.orig/vl.c 2006-08-09 19:54:26.135539222 +0100 ++++ ioemu/vl.c 2006-08-09 21:29:38.067585110 +0100 @@ -156,7 +156,7 @@ #else #define MAX_CPUS 1 @@ -483,8 +488,8 @@ Index: ioemu/vl.c } Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:23:45.000000000 +0100 -+++ ioemu/vl.h 2006-08-07 17:38:47.847289567 +0100 +--- ioemu.orig/vl.h 2006-08-09 19:54:26.136539109 +0100 ++++ ioemu/vl.h 2006-08-09 21:31:21.772931536 +0100 @@ -167,6 +167,7 @@ extern int kqemu_allowed; extern int win2k_install_hack; @@ -493,10 +498,20 @@ Index: ioemu/vl.h extern int smp_cpus; /* XXX: make it dynamic */ +@@ -922,6 +923,9 @@ + void piix4_pm_init(PCIBus *bus, int devfn); + void acpi_bios_init(void); + ++/* piix4acpi.c */ ++extern void pci_piix4_acpi_init(PCIBus *bus, int devfn); ++ + /* pc.c */ + extern QEMUMachine pc_machine; + extern QEMUMachine isapc_machine; Index: ioemu/hw/piix_pci.c =================================================================== ---- ioemu.orig/hw/piix_pci.c 2006-08-06 02:29:41.000000000 +0100 -+++ ioemu/hw/piix_pci.c 2006-08-07 17:38:57.480198468 +0100 +--- ioemu.orig/hw/piix_pci.c 2006-08-09 19:54:19.636318228 +0100 ++++ ioemu/hw/piix_pci.c 2006-08-09 19:54:26.152537305 +0100 @@ -241,7 +241,7 @@ static uint32_t pci_bios_io_addr; static uint32_t pci_bios_mem_addr; diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/acpi-timer-support --- a/tools/ioemu/patches/acpi-timer-support Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/acpi-timer-support Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/piix4acpi.c Index: ioemu/hw/piix4acpi.c =================================================================== ---- ioemu.orig/hw/piix4acpi.c 2006-08-06 02:24:54.262068457 +0100 -+++ ioemu/hw/piix4acpi.c 2006-08-06 02:30:29.288761563 +0100 +--- ioemu.orig/hw/piix4acpi.c 2006-08-09 20:00:56.118008198 +0100 ++++ ioemu/hw/piix4acpi.c 2006-08-09 20:04:54.375299065 +0100 @@ -24,31 +24,30 @@ */ @@ -185,7 +185,7 @@ Index: ioemu/hw/piix4acpi.c - /* PIIX4 acpi pci configuration space, func 3 */ - void pci_piix4_acpi_init(PCIBus *bus) + void pci_piix4_acpi_init(PCIBus *bus, int devfn) @@ -384,5 +383,5 @@ pci_register_io_region((PCIDevice *)d, 4, 0x10, PCI_ADDRESS_SPACE_IO, acpi_map); diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/domain-destroy --- a/tools/ioemu/patches/domain-destroy Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/domain-destroy Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/monitor.c Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-06 02:22:01.487319736 +0100 -+++ ioemu/monitor.c 2006-08-06 02:23:02.269544103 +0100 +--- ioemu.orig/monitor.c 2006-08-08 11:27:48.555190337 +0100 ++++ ioemu/monitor.c 2006-08-08 11:27:53.984584612 +0100 @@ -308,6 +308,7 @@ static void do_quit(void) @@ -12,10 +12,10 @@ Index: ioemu/monitor.c Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-08-06 02:22:59.251880493 +0100 -+++ ioemu/target-i386-dm/helper2.c 2006-08-06 02:23:02.270543991 +0100 -@@ -483,5 +483,25 @@ - shared_page->vcpu_iodata[send_vcpu].dm_eport); +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-08 11:27:53.063687351 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:27:54.011581601 +0100 +@@ -488,5 +488,25 @@ + xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); } } + destroy_hvm_domain(); @@ -42,8 +42,8 @@ Index: ioemu/target-i386-dm/helper2.c +} Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:22:59.255880047 +0100 -+++ ioemu/vl.h 2006-08-06 02:23:02.271543880 +0100 +--- ioemu.orig/vl.h 2006-08-08 11:27:53.067686905 +0100 ++++ ioemu/vl.h 2006-08-08 11:27:54.061576023 +0100 @@ -1189,4 +1189,7 @@ void kqemu_record_dump(void); diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/domain-reset --- a/tools/ioemu/patches/domain-reset Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/domain-reset Sun Aug 13 12:00:38 2006 -0400 @@ -1,8 +1,8 @@ Index: ioemu/target-i386-dm/helper2.c Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-08-06 02:21:15.779415007 +0100 -+++ ioemu/target-i386-dm/helper2.c 2006-08-06 02:22:59.251880493 +0100 -@@ -123,6 +123,25 @@ +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-08 11:27:45.566523765 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:27:53.063687351 +0100 +@@ -127,6 +127,25 @@ /* called from main_cpu_reset */ void cpu_reset(CPUX86State *env) { @@ -28,7 +28,7 @@ Index: ioemu/target-i386-dm/helper2.c } void cpu_x86_close(CPUX86State *env) -@@ -449,6 +468,10 @@ +@@ -455,6 +474,10 @@ if (vm_running) { if (shutdown_requested) break; @@ -41,8 +41,8 @@ Index: ioemu/target-i386-dm/helper2.c /* Wait up to 10 msec. */ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:22:56.618174081 +0100 -+++ ioemu/vl.c 2006-08-06 02:22:59.254880158 +0100 +--- ioemu.orig/vl.c 2006-08-08 11:27:52.994695048 +0100 ++++ ioemu/vl.c 2006-08-08 11:27:53.066687017 +0100 @@ -4948,7 +4948,7 @@ } QEMUResetEntry; @@ -54,8 +54,8 @@ Index: ioemu/vl.c Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:22:01.501318175 +0100 -+++ ioemu/vl.h 2006-08-06 02:22:59.255880047 +0100 +--- ioemu.orig/vl.h 2006-08-08 11:27:48.757167803 +0100 ++++ ioemu/vl.h 2006-08-08 11:27:53.067686905 +0100 @@ -130,6 +130,7 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque); diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/domain-timeoffset --- a/tools/ioemu/patches/domain-timeoffset Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/domain-timeoffset Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/mc146818rtc.c Index: ioemu/hw/mc146818rtc.c =================================================================== ---- ioemu.orig/hw/mc146818rtc.c 2006-08-07 17:44:43.593604340 +0100 -+++ ioemu/hw/mc146818rtc.c 2006-08-07 17:44:47.594168708 +0100 +--- ioemu.orig/hw/mc146818rtc.c 2006-08-09 21:32:18.709516404 +0100 ++++ ioemu/hw/mc146818rtc.c 2006-08-09 21:32:24.723838065 +0100 @@ -178,10 +178,27 @@ } } @@ -46,8 +46,8 @@ Index: ioemu/hw/mc146818rtc.c static void rtc_copy_date(RTCState *s) Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-07 17:44:47.324198106 +0100 -+++ ioemu/hw/pc.c 2006-08-07 17:44:54.830380715 +0100 +--- ioemu.orig/hw/pc.c 2006-08-09 21:32:24.449868968 +0100 ++++ ioemu/hw/pc.c 2006-08-09 21:32:24.724837952 +0100 @@ -159,7 +159,7 @@ } @@ -117,8 +117,8 @@ Index: ioemu/hw/pc.c QEMUMachine pc_machine = { Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:44:47.464182863 +0100 -+++ ioemu/vl.c 2006-08-07 17:44:54.830380715 +0100 +--- ioemu.orig/vl.c 2006-08-09 21:32:24.591852952 +0100 ++++ ioemu/vl.c 2006-08-09 21:32:24.727837614 +0100 @@ -163,6 +163,8 @@ int xc_handle; @@ -174,8 +174,8 @@ Index: ioemu/vl.c if (usb_enabled) { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:44:47.329197562 +0100 -+++ ioemu/vl.h 2006-08-07 17:44:54.830380715 +0100 +--- ioemu.orig/vl.h 2006-08-09 21:32:24.454868404 +0100 ++++ ioemu/vl.h 2006-08-09 21:32:24.728837501 +0100 @@ -575,7 +575,7 @@ int boot_device, DisplayState *ds, const char **fd_filename, int snapshot, diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/qemu-target-i386-dm --- a/tools/ioemu/patches/qemu-target-i386-dm Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/qemu-target-i386-dm Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-06 02:14:09.794902973 +0100 -+++ ioemu/Makefile.target 2006-08-06 02:21:42.270461924 +0100 +--- ioemu.orig/Makefile.target 2006-08-08 11:24:33.479955101 +0100 ++++ ioemu/Makefile.target 2006-08-08 11:24:39.008338255 +0100 @@ -62,6 +62,8 @@ QEMU_SYSTEM=qemu-fast endif @@ -32,8 +32,8 @@ Index: ioemu/Makefile.target DEFINES += -DHAS_AUDIO Index: ioemu/configure =================================================================== ---- ioemu.orig/configure 2006-08-06 02:14:09.795902861 +0100 -+++ ioemu/configure 2006-08-06 02:15:01.771108621 +0100 +--- ioemu.orig/configure 2006-08-08 11:24:33.480954990 +0100 ++++ ioemu/configure 2006-08-08 11:24:38.122437102 +0100 @@ -373,6 +373,8 @@ if [ "$user" = "yes" ] ; then target_list="i386-user arm-user armeb-user sparc-user ppc-user mips-user mipsel-user $target_list" @@ -45,8 +45,8 @@ Index: ioemu/configure fi Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-06 02:14:49.574468309 +0100 -+++ ioemu/monitor.c 2006-08-06 02:21:16.172371202 +0100 +--- ioemu.orig/monitor.c 2006-08-08 11:24:33.484954543 +0100 ++++ ioemu/monitor.c 2006-08-08 11:24:39.253310921 +0100 @@ -1262,6 +1262,10 @@ "", "show profiling information", }, { "capture", "", do_info_capture, @@ -60,8 +60,8 @@ Index: ioemu/monitor.c Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:14:09.802902081 +0100 -+++ ioemu/vl.c 2006-08-06 02:21:16.369349244 +0100 +--- ioemu.orig/vl.c 2006-08-08 11:24:33.486954320 +0100 ++++ ioemu/vl.c 2006-08-08 11:24:39.454288496 +0100 @@ -87,7 +87,7 @@ #include "exec-all.h" @@ -98,8 +98,8 @@ Index: ioemu/vl.c { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:13:56.733359091 +0100 -+++ ioemu/vl.h 2006-08-06 02:21:16.369349244 +0100 +--- ioemu.orig/vl.h 2006-08-08 11:24:31.082222636 +0100 ++++ ioemu/vl.h 2006-08-08 11:24:39.454288496 +0100 @@ -37,6 +37,8 @@ #include <unistd.h> #include <fcntl.h> @@ -132,7 +132,7 @@ Index: ioemu/target-i386-dm/cpu.h Index: ioemu/target-i386-dm/cpu.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/cpu.h 2006-08-06 02:21:16.023387810 +0100 ++++ ioemu/target-i386-dm/cpu.h 2006-08-08 11:24:39.099328102 +0100 @@ -0,0 +1,86 @@ +/* + * i386 virtual CPU header @@ -223,7 +223,7 @@ Index: ioemu/target-i386-dm/exec-dm.c Index: ioemu/target-i386-dm/exec-dm.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/exec-dm.c 2006-08-06 02:21:16.024387698 +0100 ++++ ioemu/target-i386-dm/exec-dm.c 2006-08-08 11:24:39.099328102 +0100 @@ -0,0 +1,516 @@ +/* + * virtual page mapping and translated block handling @@ -744,8 +744,8 @@ Index: ioemu/target-i386-dm/helper2.c Index: ioemu/target-i386-dm/helper2.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/helper2.c 2006-08-06 02:21:15.779415007 +0100 -@@ -0,0 +1,464 @@ ++++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:24:44.888682140 +0100 +@@ -0,0 +1,469 @@ +/* + * i386 helpers (without register variable usage) + * @@ -830,6 +830,10 @@ Index: ioemu/target-i386-dm/helper2.c +/* which vcpu we are serving */ +int send_vcpu = 0; + ++//the evtchn port for polling the notification, ++#define NR_CPUS 32 ++evtchn_port_t ioreq_local_port[NR_CPUS]; ++ +CPUX86State *cpu_x86_init(void) +{ + CPUX86State *env; @@ -861,7 +865,7 @@ Index: ioemu/target-i386-dm/helper2.c + fprintf(logfile, "bind interdomain ioctl error %d\n", errno); + return NULL; + } -+ shared_page->vcpu_iodata[i].dm_eport = rc; ++ ioreq_local_port[i] = rc; + } + } + @@ -913,8 +917,7 @@ Index: ioemu/target-i386-dm/helper2.c + + for (i = 0; i < vcpus; i++) { + req = &(shared_page->vcpu_iodata[i].vp_ioreq); -+ term_printf("vcpu %d: event port %d\n", i, -+ shared_page->vcpu_iodata[i].vp_eport); ++ term_printf("vcpu %d: event port %d\n", i, ioreq_local_port[i]); + term_printf(" req state: %x, pvalid: %x, addr: %"PRIx64", " + "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", + req->state, req->pdata_valid, req->addr, @@ -933,6 +936,7 @@ Index: ioemu/target-i386-dm/helper2.c + + if (req->state == STATE_IOREQ_READY) { + req->state = STATE_IOREQ_INPROCESS; ++ rmb(); + return req; + } + @@ -955,7 +959,7 @@ Index: ioemu/target-i386-dm/helper2.c + port = xc_evtchn_pending(xce_handle); + if (port != -1) { + for ( i = 0; i < vcpus; i++ ) -+ if ( shared_page->vcpu_iodata[i].dm_eport == port ) ++ if ( ioreq_local_port[i] == port ) + break; + + if ( i == vcpus ) { @@ -1176,8 +1180,10 @@ Index: ioemu/target-i386-dm/helper2.c + } + + /* No state change if state = STATE_IORESP_HOOK */ -+ if (req->state == STATE_IOREQ_INPROCESS) ++ if (req->state == STATE_IOREQ_INPROCESS) { ++ mb(); + req->state = STATE_IORESP_READY; ++ } + env->send_event = 1; + } +} @@ -1204,8 +1210,7 @@ Index: ioemu/target-i386-dm/helper2.c + + if (env->send_event) { + env->send_event = 0; -+ xc_evtchn_notify(xce_handle, -+ shared_page->vcpu_iodata[send_vcpu].dm_eport); ++ xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); + } + } + return 0; @@ -1213,7 +1218,7 @@ Index: ioemu/target-i386-dm/i8259-dm.c Index: ioemu/target-i386-dm/i8259-dm.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/i8259-dm.c 2006-08-06 02:15:01.777107952 +0100 ++++ ioemu/target-i386-dm/i8259-dm.c 2006-08-08 11:24:33.505952200 +0100 @@ -0,0 +1,107 @@ +/* Xen 8259 stub for interrupt controller emulation + * @@ -1325,7 +1330,7 @@ Index: ioemu/target-i386-dm/qemu-dm.debu Index: ioemu/target-i386-dm/qemu-dm.debug =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/qemu-dm.debug 2006-08-06 02:15:01.778107841 +0100 ++++ ioemu/target-i386-dm/qemu-dm.debug 2006-08-08 11:24:33.505952200 +0100 @@ -0,0 +1,5 @@ +#!/bin/sh + @@ -1335,7 +1340,7 @@ Index: ioemu/target-i386-dm/qemu-ifup Index: ioemu/target-i386-dm/qemu-ifup =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/target-i386-dm/qemu-ifup 2006-08-06 02:15:01.778107841 +0100 ++++ ioemu/target-i386-dm/qemu-ifup 2006-08-08 11:24:33.505952200 +0100 @@ -0,0 +1,10 @@ +#!/bin/sh + diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/series --- a/tools/ioemu/patches/series Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/series Sun Aug 13 12:00:38 2006 -0400 @@ -34,8 +34,10 @@ vnc-start-vncviewer vnc-start-vncviewer vnc-title-domain-name vnc-access-monitor-vt +vnc-display-find-unused xenstore-block-device-config xenstore-write-vnc-port qemu-allow-disable-sdl qemu-fix-memset-args qemu-fix-write-to-disk-synchronous +xen-support-buffered-ioreqs diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/xenstore-block-device-config --- a/tools/ioemu/patches/xenstore-block-device-config Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/xenstore-block-device-config Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-07 17:42:27.802386071 +0100 -+++ ioemu/Makefile.target 2006-08-07 17:42:28.683289358 +0100 +--- ioemu.orig/Makefile.target 2006-08-09 21:32:24.915816410 +0100 ++++ ioemu/Makefile.target 2006-08-09 21:32:25.500750429 +0100 @@ -358,6 +358,7 @@ VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o VL_OBJS+= usb-uhci.o @@ -13,7 +13,7 @@ Index: ioemu/xenstore.c Index: ioemu/xenstore.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/xenstore.c 2006-08-07 17:42:28.684289249 +0100 ++++ ioemu/xenstore.c 2006-08-09 21:32:25.501750317 +0100 @@ -0,0 +1,187 @@ +/* + * This file is subject to the terms and conditions of the GNU General @@ -204,9 +204,9 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:42:28.393320909 +0100 -+++ ioemu/vl.c 2006-08-07 17:42:28.687288922 +0100 -@@ -5242,9 +5242,11 @@ +--- ioemu.orig/vl.c 2006-08-09 21:32:25.438757422 +0100 ++++ ioemu/vl.c 2006-08-09 21:32:25.504749978 +0100 +@@ -5243,9 +5243,11 @@ "Standard options:\n" "-M machine select emulated machine (-M ? for list)\n" "-fda/-fdb file use 'file' as floppy disk 0/1 image\n" @@ -218,7 +218,7 @@ Index: ioemu/vl.c "-boot [a|c|d] boot on floppy (a), hard disk (c) or CD-ROM (d)\n" "-snapshot write to temporary files instead of disk image files\n" #ifdef TARGET_I386 -@@ -5370,11 +5372,13 @@ +@@ -5372,11 +5374,13 @@ QEMU_OPTION_M, QEMU_OPTION_fda, QEMU_OPTION_fdb, @@ -232,7 +232,7 @@ Index: ioemu/vl.c QEMU_OPTION_boot, QEMU_OPTION_snapshot, #ifdef TARGET_I386 -@@ -5445,11 +5449,13 @@ +@@ -5448,11 +5452,13 @@ { "M", HAS_ARG, QEMU_OPTION_M }, { "fda", HAS_ARG, QEMU_OPTION_fda }, { "fdb", HAS_ARG, QEMU_OPTION_fdb }, @@ -246,7 +246,7 @@ Index: ioemu/vl.c { "boot", HAS_ARG, QEMU_OPTION_boot }, { "snapshot", 0, QEMU_OPTION_snapshot }, #ifdef TARGET_I386 -@@ -5797,10 +5803,16 @@ +@@ -5801,10 +5807,16 @@ #ifdef CONFIG_GDBSTUB int use_gdbstub, gdbstub_port; #endif @@ -265,7 +265,7 @@ Index: ioemu/vl.c const char *kernel_filename, *kernel_cmdline; DisplayState *ds = &display_state; int cyls, heads, secs, translation; -@@ -5861,8 +5873,10 @@ +@@ -5865,8 +5877,10 @@ initrd_filename = NULL; for(i = 0; i < MAX_FD; i++) fd_filename[i] = NULL; @@ -276,8 +276,8 @@ Index: ioemu/vl.c ram_size = DEFAULT_RAM_SIZE * 1024 * 1024; vga_ram_size = VGA_RAM_SIZE; bios_size = BIOS_SIZE; -@@ -5875,11 +5889,13 @@ - vncviewer = 0; +@@ -5880,11 +5894,13 @@ + vncunused = 0; kernel_filename = NULL; kernel_cmdline = ""; +#ifndef CONFIG_DM @@ -290,7 +290,7 @@ Index: ioemu/vl.c cyls = heads = secs = 0; translation = BIOS_ATA_TRANSLATION_AUTO; pstrcpy(monitor_device, sizeof(monitor_device), "vc"); -@@ -5912,7 +5928,11 @@ +@@ -5917,7 +5933,11 @@ break; r = argv[optind]; if (r[0] != '-') { @@ -302,7 +302,7 @@ Index: ioemu/vl.c } else { const QEMUOption *popt; -@@ -5956,6 +5976,7 @@ +@@ -5961,6 +5981,7 @@ case QEMU_OPTION_initrd: initrd_filename = optarg; break; @@ -310,7 +310,7 @@ Index: ioemu/vl.c case QEMU_OPTION_hda: case QEMU_OPTION_hdb: case QEMU_OPTION_hdc: -@@ -5968,6 +5989,7 @@ +@@ -5973,6 +5994,7 @@ cdrom_index = -1; } break; @@ -318,7 +318,7 @@ Index: ioemu/vl.c case QEMU_OPTION_snapshot: snapshot = 1; break; -@@ -6020,11 +6042,13 @@ +@@ -6025,11 +6047,13 @@ case QEMU_OPTION_append: kernel_cmdline = optarg; break; @@ -332,7 +332,7 @@ Index: ioemu/vl.c case QEMU_OPTION_boot: boot_device = optarg[0]; if (boot_device != 'a' && -@@ -6274,12 +6298,18 @@ +@@ -6284,12 +6308,18 @@ } } @@ -351,7 +351,7 @@ Index: ioemu/vl.c if (!linux_boot && hd_filename[0] == '\0' && (cdrom_index >= 0 && hd_filename[cdrom_index] == '\0') && -@@ -6293,6 +6323,7 @@ +@@ -6303,6 +6333,7 @@ else boot_device = 'd'; } @@ -359,7 +359,7 @@ Index: ioemu/vl.c setvbuf(stdout, NULL, _IOLBF, 0); -@@ -6407,6 +6438,7 @@ +@@ -6417,6 +6448,7 @@ #endif /* !CONFIG_DM */ @@ -367,7 +367,7 @@ Index: ioemu/vl.c /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); if (cdrom_index >= 0) { -@@ -6433,6 +6465,7 @@ +@@ -6443,6 +6475,7 @@ } } } @@ -375,7 +375,7 @@ Index: ioemu/vl.c /* we always create at least one floppy disk */ fd_table[0] = bdrv_new("fda"); -@@ -6511,6 +6544,8 @@ +@@ -6521,6 +6554,8 @@ } } @@ -386,8 +386,8 @@ Index: ioemu/vl.c kernel_filename, kernel_cmdline, initrd_filename, Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-07 17:42:27.132461888 +0100 -+++ ioemu/monitor.c 2006-08-07 17:42:28.688288814 +0100 +--- ioemu.orig/monitor.c 2006-08-09 21:32:24.238892765 +0100 ++++ ioemu/monitor.c 2006-08-09 21:32:25.505749865 +0100 @@ -24,6 +24,7 @@ #include "vl.h" #include "disas.h" @@ -416,8 +416,8 @@ Index: ioemu/monitor.c int i; Index: ioemu/block.c =================================================================== ---- ioemu.orig/block.c 2006-08-07 17:42:21.704076241 +0100 -+++ ioemu/block.c 2006-08-07 17:42:28.689288705 +0100 +--- ioemu.orig/block.c 2006-08-09 21:32:18.339558126 +0100 ++++ ioemu/block.c 2006-08-09 21:32:25.506749753 +0100 @@ -758,6 +758,7 @@ static void raw_close(BlockDriverState *bs) { @@ -428,9 +428,9 @@ Index: ioemu/block.c Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:42:28.394320800 +0100 -+++ ioemu/vl.h 2006-08-07 17:42:28.689288705 +0100 -@@ -1184,6 +1184,8 @@ +--- ioemu.orig/vl.h 2006-08-09 21:32:25.439757309 +0100 ++++ ioemu/vl.h 2006-08-09 21:32:25.506749753 +0100 +@@ -1187,6 +1187,8 @@ void term_print_help(void); void monitor_readline(const char *prompt, int is_password, char *buf, int buf_size); @@ -439,7 +439,7 @@ Index: ioemu/vl.h /* readline.c */ typedef void ReadLineFunc(void *opaque, const char *str); -@@ -1196,6 +1198,13 @@ +@@ -1199,6 +1201,13 @@ void readline_start(const char *prompt, int is_password, ReadLineFunc *readline_func, void *opaque); @@ -455,8 +455,8 @@ Index: ioemu/vl.h extern char domain_name[]; Index: ioemu/hw/ide.c =================================================================== ---- ioemu.orig/hw/ide.c 2006-08-07 17:42:27.552414361 +0100 -+++ ioemu/hw/ide.c 2006-08-07 17:42:28.691288487 +0100 +--- ioemu.orig/hw/ide.c 2006-08-09 21:32:24.658845396 +0100 ++++ ioemu/hw/ide.c 2006-08-09 21:32:25.508749527 +0100 @@ -1158,6 +1158,7 @@ } else { ide_atapi_cmd_error(s, SENSE_NOT_READY, diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/patches/xenstore-write-vnc-port --- a/tools/ioemu/patches/xenstore-write-vnc-port Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/patches/xenstore-write-vnc-port Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/xenstore.c Index: ioemu/xenstore.c =================================================================== ---- ioemu.orig/xenstore.c 2006-08-07 17:42:28.684289249 +0100 -+++ ioemu/xenstore.c 2006-08-07 17:42:28.891266728 +0100 +--- ioemu.orig/xenstore.c 2006-08-09 21:32:25.501750317 +0100 ++++ ioemu/xenstore.c 2006-08-09 21:32:25.706727195 +0100 @@ -185,3 +185,31 @@ free(image); free(vec); @@ -36,10 +36,10 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:42:28.687288922 +0100 -+++ ioemu/vl.c 2006-08-07 17:42:28.894266401 +0100 -@@ -6501,6 +6501,7 @@ - vnc_display_init(ds, vnc_display); +--- ioemu.orig/vl.c 2006-08-09 21:32:25.504749978 +0100 ++++ ioemu/vl.c 2006-08-09 21:32:25.709726857 +0100 +@@ -6511,6 +6511,7 @@ + vnc_display = vnc_display_init(ds, vnc_display, vncunused); if (vncviewer) vnc_start_viewer(vnc_display); + xenstore_write_vncport(vnc_display); @@ -48,9 +48,9 @@ Index: ioemu/vl.c sdl_display_init(ds, full_screen); Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:42:28.689288705 +0100 -+++ ioemu/vl.h 2006-08-07 17:42:28.895266293 +0100 -@@ -1203,6 +1203,7 @@ +--- ioemu.orig/vl.h 2006-08-09 21:32:25.506749753 +0100 ++++ ioemu/vl.h 2006-08-09 21:32:25.710726744 +0100 +@@ -1206,6 +1206,7 @@ int xenstore_fd(void); void xenstore_process_event(void *opaque); void xenstore_check_new_media_present(int timeout); diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/target-i386-dm/helper2.c Sun Aug 13 12:00:38 2006 -0400 @@ -76,11 +76,19 @@ int xc_handle; shared_iopage_t *shared_page = NULL; +#define BUFFER_IO_MAX_DELAY 100 +buffered_iopage_t *buffered_io_page = NULL; +QEMUTimer *buffered_io_timer; + /* the evtchn fd for polling */ int xce_handle = -1; /* which vcpu we are serving */ int send_vcpu = 0; + +//the evtchn port for polling the notification, +#define NR_CPUS 32 +evtchn_port_t ioreq_local_port[NR_CPUS]; CPUX86State *cpu_x86_init(void) { @@ -113,7 +121,7 @@ CPUX86State *cpu_x86_init(void) fprintf(logfile, "bind interdomain ioctl error %d\n", errno); return NULL; } - shared_page->vcpu_iodata[i].dm_eport = rc; + ioreq_local_port[i] = rc; } } @@ -184,8 +192,7 @@ void sp_info() for (i = 0; i < vcpus; i++) { req = &(shared_page->vcpu_iodata[i].vp_ioreq); - term_printf("vcpu %d: event port %d\n", i, - shared_page->vcpu_iodata[i].vp_eport); + term_printf("vcpu %d: event port %d\n", i, ioreq_local_port[i]); term_printf(" req state: %x, pvalid: %x, addr: %"PRIx64", " "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n", req->state, req->pdata_valid, req->addr, @@ -204,6 +211,7 @@ static ioreq_t *__cpu_get_ioreq(int vcpu if (req->state == STATE_IOREQ_READY) { req->state = STATE_IOREQ_INPROCESS; + rmb(); return req; } @@ -226,7 +234,7 @@ static ioreq_t *cpu_get_ioreq(void) port = xc_evtchn_pending(xce_handle); if (port != -1) { for ( i = 0; i < vcpus; i++ ) - if ( shared_page->vcpu_iodata[i].dm_eport == port ) + if ( ioreq_local_port[i] == port ) break; if ( i == vcpus ) { @@ -415,40 +423,74 @@ void cpu_ioreq_xor(CPUState *env, ioreq_ req->u.data = tmp1; } +void __handle_ioreq(CPUState *env, ioreq_t *req) +{ + if (!req->pdata_valid && req->dir == IOREQ_WRITE && req->size != 4) + req->u.data &= (1UL << (8 * req->size)) - 1; + + switch (req->type) { + case IOREQ_TYPE_PIO: + cpu_ioreq_pio(env, req); + break; + case IOREQ_TYPE_COPY: + cpu_ioreq_move(env, req); + break; + case IOREQ_TYPE_AND: + cpu_ioreq_and(env, req); + break; + case IOREQ_TYPE_OR: + cpu_ioreq_or(env, req); + break; + case IOREQ_TYPE_XOR: + cpu_ioreq_xor(env, req); + break; + default: + hw_error("Invalid ioreq type 0x%x\n", req->type); + } +} + +void __handle_buffered_iopage(CPUState *env) +{ + ioreq_t *req = NULL; + + if (!buffered_io_page) + return; + + while (buffered_io_page->read_pointer != + buffered_io_page->write_pointer) { + req = &buffered_io_page->ioreq[buffered_io_page->read_pointer % + IOREQ_BUFFER_SLOT_NUM]; + + __handle_ioreq(env, req); + + mb(); + buffered_io_page->read_pointer++; + } +} + +void handle_buffered_io(void *opaque) +{ + CPUState *env = opaque; + + __handle_buffered_iopage(env); + qemu_mod_timer(buffered_io_timer, BUFFER_IO_MAX_DELAY + + qemu_get_clock(rt_clock)); +} + void cpu_handle_ioreq(void *opaque) { CPUState *env = opaque; ioreq_t *req = cpu_get_ioreq(); + handle_buffered_io(env); if (req) { - if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { - if (req->size != 4) - req->u.data &= (1UL << (8 * req->size))-1; - } - - switch (req->type) { - case IOREQ_TYPE_PIO: - cpu_ioreq_pio(env, req); - break; - case IOREQ_TYPE_COPY: - cpu_ioreq_move(env, req); - break; - case IOREQ_TYPE_AND: - cpu_ioreq_and(env, req); - break; - case IOREQ_TYPE_OR: - cpu_ioreq_or(env, req); - break; - case IOREQ_TYPE_XOR: - cpu_ioreq_xor(env, req); - break; - default: - hw_error("Invalid ioreq type 0x%x\n", req->type); - } + __handle_ioreq(env, req); /* No state change if state = STATE_IORESP_HOOK */ - if (req->state == STATE_IOREQ_INPROCESS) + if (req->state == STATE_IOREQ_INPROCESS) { + mb(); req->state = STATE_IORESP_READY; + } env->send_event = 1; } } @@ -459,6 +501,10 @@ int main_loop(void) extern int shutdown_requested; CPUState *env = cpu_single_env; int evtchn_fd = xc_evtchn_fd(xce_handle); + + buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io, + cpu_single_env); + qemu_mod_timer(buffered_io_timer, qemu_get_clock(rt_clock)); qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env); @@ -479,8 +525,7 @@ int main_loop(void) if (env->send_event) { env->send_event = 0; - xc_evtchn_notify(xce_handle, - shared_page->vcpu_iodata[send_vcpu].dm_eport); + xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); } } destroy_hvm_domain(); diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/vl.c Sun Aug 13 12:00:38 2006 -0400 @@ -121,6 +121,7 @@ static DisplayState display_state; static DisplayState display_state; int nographic; int vncviewer; +int vncunused; const char* keyboard_layout = NULL; int64_t ticks_per_sec; int boot_device = 'c'; @@ -5344,6 +5345,7 @@ void help(void) "-loadvm file start right away with a saved state (loadvm in monitor)\n" "-vnc display start a VNC server on display\n" "-vncviewer start a vncviewer process for this domain\n" + "-vncunused bind the VNC server to an unused port\n" "-timeoffset time offset (in seconds) from local time\n" "-acpi disable or enable ACPI of HVM domain \n" "\n" @@ -5435,6 +5437,7 @@ enum { QEMU_OPTION_timeoffset, QEMU_OPTION_acpi, QEMU_OPTION_vncviewer, + QEMU_OPTION_vncunused, }; typedef struct QEMUOption { @@ -5512,6 +5515,7 @@ const QEMUOption qemu_options[] = { { "smp", HAS_ARG, QEMU_OPTION_smp }, { "vnc", HAS_ARG, QEMU_OPTION_vnc }, { "vncviewer", 0, QEMU_OPTION_vncviewer }, + { "vncunused", 0, QEMU_OPTION_vncunused }, /* temporary options */ { "usb", 0, QEMU_OPTION_usb }, @@ -5834,6 +5838,7 @@ int main(int argc, char **argv) unsigned long nr_pages; xen_pfn_t *page_array; extern void *shared_page; + extern void *buffered_io_page; char qemu_dm_logfilename[64]; @@ -5887,6 +5892,7 @@ int main(int argc, char **argv) snapshot = 0; nographic = 0; vncviewer = 0; + vncunused = 0; kernel_filename = NULL; kernel_cmdline = ""; #ifndef CONFIG_DM @@ -6294,6 +6300,11 @@ int main(int argc, char **argv) case QEMU_OPTION_vncviewer: vncviewer++; break; + case QEMU_OPTION_vncunused: + vncunused++; + if (vnc_display == -1) + vnc_display = -2; + break; } } } @@ -6378,11 +6389,16 @@ int main(int argc, char **argv) phys_ram_base = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE, page_array, - nr_pages - 1); + nr_pages - 3); if (phys_ram_base == 0) { fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno); exit(-1); } + + /* not yet add for IA64 */ + buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, + page_array[nr_pages - 3]); shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ|PROT_WRITE, @@ -6498,7 +6514,7 @@ int main(int argc, char **argv) if (nographic) { dumb_display_init(ds); } else if (vnc_display != -1) { - vnc_display_init(ds, vnc_display); + vnc_display = vnc_display_init(ds, vnc_display, vncunused); if (vncviewer) vnc_start_viewer(vnc_display); xenstore_write_vncport(vnc_display); diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/vl.h Sun Aug 13 12:00:38 2006 -0400 @@ -784,7 +784,7 @@ void cocoa_display_init(DisplayState *ds void cocoa_display_init(DisplayState *ds, int full_screen); /* vnc.c */ -void vnc_display_init(DisplayState *ds, int display); +int vnc_display_init(DisplayState *ds, int display, int find_unused); int vnc_start_viewer(int port); /* ide.c */ @@ -924,6 +924,9 @@ extern int acpi_enabled; extern int acpi_enabled; void piix4_pm_init(PCIBus *bus, int devfn); void acpi_bios_init(void); + +/* piix4acpi.c */ +extern void pci_piix4_acpi_init(PCIBus *bus, int devfn); /* pc.c */ extern QEMUMachine pc_machine; diff -r bb510c274af8 -r 5f92043a3ab1 tools/ioemu/vnc.c --- a/tools/ioemu/vnc.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/ioemu/vnc.c Sun Aug 13 12:00:38 2006 -0400 @@ -1183,7 +1183,7 @@ static void vnc_listen_read(void *opaque } } -void vnc_display_init(DisplayState *ds, int display) +int vnc_display_init(DisplayState *ds, int display, int find_unused) { struct sockaddr_in addr; int reuse_addr, ret; @@ -1213,10 +1213,6 @@ void vnc_display_init(DisplayState *ds, fprintf(stderr, "Could not create socket\n"); exit(1); } - - addr.sin_family = AF_INET; - addr.sin_port = htons(5900 + display); - memset(&addr.sin_addr, 0, sizeof(addr.sin_addr)); reuse_addr = 1; ret = setsockopt(vs->lsock, SOL_SOCKET, SO_REUSEADDR, @@ -1226,7 +1222,16 @@ void vnc_display_init(DisplayState *ds, exit(1); } + retry: + addr.sin_family = AF_INET; + addr.sin_port = htons(5900 + display); + memset(&addr.sin_addr, 0, sizeof(addr.sin_addr)); + if (bind(vs->lsock, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + if (find_unused && errno == EADDRINUSE) { + display++; + goto retry; + } fprintf(stderr, "bind() failed\n"); exit(1); } @@ -1247,6 +1252,8 @@ void vnc_display_init(DisplayState *ds, vs->ds->dpy_refresh = vnc_dpy_refresh; vnc_dpy_resize(vs->ds, 640, 400); + + return display; } int vnc_start_viewer(int port) diff -r bb510c274af8 -r 5f92043a3ab1 tools/libxc/ia64/xc_ia64_hvm_build.c --- a/tools/libxc/ia64/xc_ia64_hvm_build.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/libxc/ia64/xc_ia64_hvm_build.c Sun Aug 13 12:00:38 2006 -0400 @@ -6,6 +6,7 @@ #include <zlib.h> #include "xen/arch-ia64.h" #include <xen/hvm/ioreq.h> +#include <xen/hvm/params.h> static int xc_ia64_copy_to_domain_pages(int xc_handle, uint32_t domid, void* src_page, @@ -40,6 +41,31 @@ error_out: return -1; } +static void +xc_set_hvm_param(int handle, domid_t dom, int param, unsigned long value) +{ + DECLARE_HYPERCALL; + xen_hvm_param_t arg; + int rc; + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_set_param; + hypercall.arg[1] = (unsigned long)&arg; + + arg.domid = dom; + arg.index = param; + arg.value = value; + + if (mlock(&arg, sizeof(arg)) != 0) { + PERROR("Could not lock memory for set parameter"); + return; + } + + rc = do_xen_hypercall(handle, &hypercall); + safe_munlock(&arg, sizeof(arg)); + if (rc < 0) + PERROR("set HVM parameter failed (%d)", rc); +} #define HOB_SIGNATURE 0x3436474953424f48 // "HOBSIG64" #define GFW_HOB_START ((4UL<<30)-(14UL<<20)) // 4G - 14M @@ -568,6 +594,10 @@ setup_guest(int xc_handle, uint32_t dom, goto error_out; } + xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_STORE_PFN, STORE_PAGE_START>>PAGE_SHIFT); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); + *store_mfn = page_array[1]; sp = (shared_iopage_t *)xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]); diff -r bb510c274af8 -r 5f92043a3ab1 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/libxc/xc_hvm_build.c Sun Aug 13 12:00:38 2006 -0400 @@ -26,6 +26,7 @@ #define E820_IO 16 #define E820_SHARED_PAGE 17 #define E820_XENSTORE 18 +#define E820_BUFFERED_IO 19 #define E820_MAP_PAGE 0x00090000 #define E820_MAP_NR_OFFSET 0x000001E8 @@ -96,7 +97,13 @@ static void build_e820map(void *e820_pag e820entry[nr_map].type = E820_RESERVED; nr_map++; -#define STATIC_PAGES 2 /* for ioreq_t and store_mfn */ +#define STATIC_PAGES 3 + /* 3 static pages: + * - ioreq buffer. + * - xenstore. + * - shared_page. + */ + /* Most of the ram goes here */ e820entry[nr_map].addr = 0x100000; e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE; @@ -104,6 +111,12 @@ static void build_e820map(void *e820_pag nr_map++; /* Statically allocated special pages */ + + /* For buffered IO requests */ + e820entry[nr_map].addr = mem_size - 3 * PAGE_SIZE; + e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].type = E820_BUFFERED_IO; + nr_map++; /* For xenstore */ e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE; @@ -154,7 +167,7 @@ static void set_hvm_info_checksum(struct */ static int set_hvm_info(int xc_handle, uint32_t dom, xen_pfn_t *pfn_list, unsigned int vcpus, - unsigned int pae, unsigned int acpi, unsigned int apic) + unsigned int acpi, unsigned int apic) { char *va_map; struct hvm_info_table *va_hvm; @@ -172,8 +185,6 @@ static int set_hvm_info(int xc_handle, u strncpy(va_hvm->signature, "HVM INFO", 8); va_hvm->length = sizeof(struct hvm_info_table); va_hvm->acpi_enabled = acpi; - va_hvm->apic_enabled = apic; - va_hvm->pae_enabled = pae; va_hvm->nr_vcpus = vcpus; set_hvm_info_checksum(va_hvm); @@ -181,7 +192,6 @@ static int set_hvm_info(int xc_handle, u munmap(va_map, PAGE_SIZE); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); return 0; } @@ -213,6 +223,9 @@ static int setup_guest(int xc_handle, unsigned long shared_page_frame = 0; shared_iopage_t *sp; + unsigned long ioreq_buffer_frame = 0; + void *ioreq_buffer_page; + memset(&dsi, 0, sizeof(struct domain_setup_info)); if ( (parseelfimage(image, image_size, &dsi)) != 0 ) @@ -269,11 +282,13 @@ static int setup_guest(int xc_handle, goto error_out; } - if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) ) + if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) { ERROR("Couldn't set hvm info for HVM guest.\n"); goto error_out; } + + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); if ( (e820_page = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, @@ -288,33 +303,33 @@ static int setup_guest(int xc_handle, xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, shared_info_frame)) == 0 ) goto error_out; - memset(shared_info, 0, sizeof(shared_info_t)); + memset(shared_info, 0, PAGE_SIZE); /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; munmap(shared_info, PAGE_SIZE); - /* Populate the event channel port in the shared page */ + /* Paranoia */ shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1]; if ( (sp = (shared_iopage_t *) xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, shared_page_frame)) == 0 ) goto error_out; memset(sp, 0, PAGE_SIZE); - - /* FIXME: how about if we overflow the page here? */ - for ( i = 0; i < vcpus; i++ ) { - unsigned int vp_eport; - - vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0); - if ( vp_eport < 0 ) { - PERROR("Couldn't get unbound port from VMX guest.\n"); - goto error_out; - } - sp->vcpu_iodata[i].vp_eport = vp_eport; - } - munmap(sp, PAGE_SIZE); + + /* clean the buffered IO requests page */ + ioreq_buffer_frame = page_array[(v_end >> PAGE_SHIFT) - 3]; + ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, + ioreq_buffer_frame); + + if ( ioreq_buffer_page == NULL ) + goto error_out; + + memset(ioreq_buffer_page, 0, PAGE_SIZE); + + munmap(ioreq_buffer_page, PAGE_SIZE); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, (v_end >> PAGE_SHIFT) - 2); xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); diff -r bb510c274af8 -r 5f92043a3ab1 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/libxc/xc_linux_build.c Sun Aug 13 12:00:38 2006 -0400 @@ -593,7 +593,7 @@ static int setup_guest(int xc_handle, xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame); printf("shared_info = %p, err=%s frame=%lx\n", shared_info, strerror (errno), shared_info_frame); - //memset(shared_info, 0, sizeof(shared_info_t)); + //memset(shared_info, 0, PAGE_SIZE); /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; @@ -1064,7 +1064,7 @@ static int setup_guest(int xc_handle, /* shared_info page starts its life empty. */ shared_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame); - memset(shared_info, 0, sizeof(shared_info_t)); + memset(shared_info, 0, PAGE_SIZE); /* Mask all upcalls... */ for ( i = 0; i < MAX_VIRT_CPUS; i++ ) shared_info->vcpu_info[i].evtchn_upcall_mask = 1; @@ -1373,10 +1373,10 @@ int xc_linux_build(int xc_handle, error_out: free(image); - if ( fd >= 0 ) + if ( initrd_info.type == INITRD_file && initrd_info.u.file_handle ) + gzclose(initrd_info.u.file_handle); + else if ( fd >= 0 ) close(fd); - if ( initrd_info.u.file_handle ) - gzclose(initrd_info.u.file_handle); return sts; } diff -r bb510c274af8 -r 5f92043a3ab1 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/libxc/xc_linux_restore.c Sun Aug 13 12:00:38 2006 -0400 @@ -737,7 +737,7 @@ int xc_linux_restore(int xc_handle, int /* Copy saved contents of shared-info page. No checking needed. */ page = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); - memcpy(page, shared_info, sizeof(shared_info_t)); + memcpy(page, shared_info, PAGE_SIZE); munmap(page, PAGE_SIZE); /* Uncanonicalise the pfn-to-mfn table frame-number list. */ diff -r bb510c274af8 -r 5f92043a3ab1 tools/pygrub/src/GrubConf.py --- a/tools/pygrub/src/GrubConf.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/pygrub/src/GrubConf.py Sun Aug 13 12:00:38 2006 -0400 @@ -1,7 +1,7 @@ # # GrubConf.py - Simple grub.conf parsing # -# Copyright 2005 Red Hat, Inc. +# Copyright 2005-2006 Red Hat, Inc. # Jeremy Katz <katzj@xxxxxxxxxx> # # This software may be freely redistributed under the terms of the GNU @@ -16,7 +16,6 @@ import logging import logging def grub_split(s, maxsplit = -1): - """Split a grub option screen separated with either '=' or whitespace.""" eq = s.find('=') if eq == -1: return s.split(None, maxsplit) @@ -31,6 +30,12 @@ def grub_split(s, maxsplit = -1): return s.split('=', maxsplit) else: return s.split(None, maxsplit) + +def grub_exact_split(s, num): + ret = grub_split(s, num - 1) + if len(ret) < num: + return ret + [""] * (num - len(ret)) + return ret def get_path(s): """Returns a tuple of (GrubDiskPart, path) corresponding to string.""" @@ -75,25 +80,39 @@ class GrubDiskPart(object): class GrubImage(object): def __init__(self, lines): - self._root = self._initrd = self._kernel = self._args = None - for l in lines: - (com, arg) = grub_split(l, 1) - - if self.commands.has_key(com): - if self.commands[com] is not None: - exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) - else: - logging.info("Ignored image directive %s" %(com,)) - else: - logging.warning("Unknown image directive %s" %(com,)) + self.reset(lines) def __repr__(self): return ("title: %s\n" " root: %s\n" " kernel: %s\n" " args: %s\n" - " initrd: %s" %(self.title, self.root, self.kernel, + " initrd: %s\n" %(self.title, self.root, self.kernel, self.args, self.initrd)) + + def reset(self, lines): + self._root = self._initrd = self._kernel = self._args = None + self.title = "" + self.lines = [] + map(self.set_from_line, lines) + + def set_from_line(self, line, replace = None): + (com, arg) = grub_exact_split(line, 2) + + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) + else: + logging.info("Ignored image directive %s" %(com,)) + else: + logging.warning("Unknown image directive %s" %(com,)) + + # now put the line in the list of lines + if replace is None: + self.lines.append(line) + else: + self.lines.pop(replace) + self.lines.insert(replace, line) def set_root(self, val): self._root = GrubDiskPart(val) @@ -137,6 +156,7 @@ class GrubConfigFile(object): self.filename = fn self.images = [] self.timeout = -1 + self._default = 0 if fn is not None: self.parse() @@ -164,7 +184,7 @@ class GrubConfigFile(object): # new image if l.startswith("title"): if len(img) > 0: - self.images.append(GrubImage(img)) + self.add_image(GrubImage(img)) img = [l] continue @@ -172,12 +192,7 @@ class GrubConfigFile(object): img.append(l) continue - try: - (com, arg) = grub_split(l, 1) - except ValueError: - com = l - arg = "" - + (com, arg) = grub_exact_split(l, 2) if self.commands.has_key(com): if self.commands[com] is not None: exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) @@ -187,7 +202,20 @@ class GrubConfigFile(object): logging.warning("Unknown directive %s" %(com,)) if len(img) > 0: - self.images.append(GrubImage(img)) + self.add_image(GrubImage(img)) + + def set(self, line): + (com, arg) = grub_exact_split(line, 2) + if self.commands.has_key(com): + if self.commands[com] is not None: + exec("%s = r\"%s\"" %(self.commands[com], arg.strip())) + else: + logging.info("Ignored directive %s" %(com,)) + else: + logging.warning("Unknown directive %s" %(com,)) + + def add_image(self, image): + self.images.append(image) def _get_default(self): return self._default diff -r bb510c274af8 -r 5f92043a3ab1 tools/pygrub/src/pygrub --- a/tools/pygrub/src/pygrub Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/pygrub/src/pygrub Sun Aug 13 12:00:38 2006 -0400 @@ -2,7 +2,7 @@ # # pygrub - simple python-based bootloader for Xen # -# Copyright 2005 Red Hat, Inc. +# Copyright 2005-2006 Red Hat, Inc. # Jeremy Katz <katzj@xxxxxxxxxx> # # This software may be freely redistributed under the terms of the GNU @@ -14,9 +14,10 @@ # import os, sys, string, struct, tempfile +import copy import logging -import curses, _curses, curses.wrapper +import curses, _curses, curses.wrapper, curses.textpad, curses.ascii import getopt sys.path = [ '/usr/lib/python' ] + sys.path @@ -24,122 +25,387 @@ import grub.GrubConf import grub.GrubConf import grub.fsys -PYGRUB_VER = 0.3 - - -def draw_window(): - stdscr = curses.initscr() - if hasattr(curses, 'use_default_colors'): - curses.use_default_colors() - try: - curses.curs_set(0) - except _curses.error: - pass - - stdscr.addstr(1, 4, "pyGRUB version %s" %(PYGRUB_VER,)) - - win = curses.newwin(10, 74, 2, 1) - win.box() - win.refresh() - - stdscr.addstr(12, 5, "Use the U and D keys to select which entry is highlighted.") - stdscr.addstr(13, 5, "Press enter to boot the selected OS. 'e' to edit the") - stdscr.addstr(14, 5, "commands before booting, 'a' to modify the kernel arguments ") - stdscr.addstr(15, 5, "before booting, or 'c' for a command line.") - stdscr.addch(12, 13, curses.ACS_UARROW) - stdscr.addch(12, 19, curses.ACS_DARROW) - (y, x) = stdscr.getmaxyx() - stdscr.move(y - 1, x - 1) - - stdscr.refresh() - return (stdscr, win) - -def fill_entries(win, cfg, selected): - y = 0 - - for i in cfg.images: - if (0, y) > win.getmaxyx(): - break - if y == selected: - attr = curses.A_REVERSE - else: - attr = 0 - win.addstr(y + 1, 2, i.title.ljust(70), attr) - y += 1 - win.refresh() - -def select(win, line): - win.attron(curses.A_REVERSE) - win.redrawln(line + 1, 1) - win.refresh() +PYGRUB_VER = 0.4 def is_disk_image(file): fd = os.open(file, os.O_RDONLY) buf = os.read(fd, 512) os.close(fd) - if len(buf) >= 512 and struct.unpack("H", buf[0x1fe: 0x200]) == (0xaa55,): + if len(buf) >= 512 and \ + struct.unpack("H", buf[0x1fe: 0x200]) == (0xaa55,): return True return False SECTOR_SIZE=512 def get_active_offset(file): - """Find the offset for the start of the first active partition in the - disk image file.""" + """Find the offset for the start of the first active partition " + "in the disk image file.""" + fd = os.open(file, os.O_RDONLY) buf = os.read(fd, 512) for poff in (446, 462, 478, 494): # partition offsets # active partition has 0x80 as the first byte if struct.unpack("<c", buf[poff:poff+1]) == ('\x80',): - return struct.unpack("<L", buf[poff+8:poff+12])[0] * SECTOR_SIZE - return -1 - -def get_config(fn, isconfig = False): - if not os.access(fn, os.R_OK): - raise RuntimeError, "Unable to access %s" %(fn,) - - cf = grub.GrubConf.GrubConfigFile() - - if isconfig: - # set the config file and parse it - cf.filename = fn - cf.parse() - return cf - - offset = 0 - if is_disk_image(fn): - offset = get_active_offset(fn) - if offset == -1: - raise RuntimeError, "Unable to find active partition on disk" - - # open the image and read the grub config - fs = None - for fstype in grub.fsys.fstypes.values(): - if fstype.sniff_magic(fn, offset): - fs = fstype.open_fs(fn, offset) + return struct.unpack("<L", + buf[poff+8:poff+12])[0] * SECTOR_SIZE + + # if there's not a partition marked as active, fall back to + # the first partition + P1 = 446 + return struct.unpack("<L", buf[P1+8:P1+12])[0] * SECTOR_SIZE + +class GrubLineEditor(curses.textpad.Textbox): + def __init__(self, screen, startx, starty, line = ""): + screen.addstr(startx, starty, "> ") + screen.refresh() + win = curses.newwin(1, 74, startx, starty + 2) + curses.textpad.Textbox.__init__(self, win) + + self.line = list(line) + self.pos = len(line) + self.cancelled = False + self.show_text() + + def show_text(self): + """Show the text. One of our advantages over standard textboxes + is that we can handle lines longer than the window.""" + + self.win.clear() + if self.pos > 70: + if self.pos > 130: + off = 120 + else: + off = 55 + l = [ "<" ] + self.line[off:] + p = self.pos - off + else: + l = self.line[:70] + p = self.pos + self.win.addstr(0, 0, string.join(l, (""))) + if self.pos > 70: + self.win.addch(0, 0, curses.ACS_LARROW) + + self.win.move(0, p) + + def do_command(self, ch): + # we handle escape as well as moving the line around, so have + # to override some of the default handling + + self.lastcmd = ch + if ch == 27: # esc + self.cancelled = True + return 0 + elif curses.ascii.isprint(ch): + self.line.insert(self.pos, chr(ch)) + self.pos += 1 + elif ch == curses.ascii.SOH: # ^a + self.pos = 0 + elif ch in (curses.ascii.STX,curses.KEY_LEFT): + self.pos -= 1 + elif ch in (curses.ascii.BS,curses.KEY_BACKSPACE): + if self.pos > 0: + self.pos -= 1 + self.line.pop(self.pos) + elif ch == curses.ascii.EOT: # ^d + self.line.pop(self.pos) + elif ch == curses.ascii.ENQ: # ^e + self.pos = len(self.line) + elif ch in (curses.ascii.ACK, curses.KEY_RIGHT): + self.pos +=1 + elif ch == curses.ascii.VT: # ^k + self.line = self.line[:self.pos] + else: + return curses.textpad.Textbox.do_command(self, ch) + self.show_text() + return 1 + + def edit(self): + r = curses.textpad.Textbox.edit(self) + if self.cancelled: + return None + return string.join(self.line, "") + + +class Grub: + def __init__(self, file, isconfig = False): + self.screen = None + self.entry_win = None + self.text_win = None + if file: + self.read_config(file, isconfig) + + def draw_main_windows(self): + if self.screen is None: #only init stuff once + self.screen = curses.initscr() + self.screen.timeout(1000) + if hasattr(curses, 'use_default_colors'): + curses.use_default_colors() + try: + curses.curs_set(0) + except _curses.error: + pass + self.entry_win = curses.newwin(10, 74, 2, 1) + self.text_win = curses.newwin(10, 70, 12, 5) + + self.screen.clear() + self.screen.refresh() + + # create basic grub screen with a box of entries and a textbox + self.screen.addstr(1, 4, "pyGRUB version %s" %(PYGRUB_VER,)) + self.entry_win.box() + self.screen.refresh() + + def fill_entry_list(self): + self.entry_win.clear() + self.entry_win.box() + for y in range(0, len(self.cf.images)): + i = self.cf.images[y] + if (0, y) > self.entry_win.getmaxyx(): + break + if y == self.selected_image: + attr = curses.A_REVERSE + else: + attr = 0 + self.entry_win.addstr(y + 1, 2, i.title.ljust(70), attr) + self.entry_win.refresh() + + def edit_entry(self, origimg): + def draw(): + self.draw_main_windows() + + self.text_win.addstr(0, 0, "Use the U and D keys to select which entry is highlighted.") + self.text_win.addstr(1, 0, "Press 'b' to boot, 'e' to edit the selected command in the") + self.text_win.addstr(2, 0, "boot sequence, 'c' for a command-line, 'o' to open a new line") + self.text_win.addstr(3, 0, "after ('O' for before) the selected line, 'd' to remove the") + self.text_win.addstr(4, 0, "selected line, or escape to go back to the main menu.") + self.text_win.addch(0, 8, curses.ACS_UARROW) + self.text_win.addch(0, 14, curses.ACS_DARROW) + (y, x) = self.text_win.getmaxyx() + self.text_win.move(y - 1, x - 1) + self.text_win.refresh() + + curline = 1 + img = copy.deepcopy(origimg) + while 1: + draw() + self.entry_win.clear() + self.entry_win.box() + for idx in range(1, len(img.lines)): + # current line should be highlighted + attr = 0 + if idx == curline: + attr = curses.A_REVERSE + + # trim the line + l = img.lines[idx].ljust(70) + if len(l) > 70: + l = l[:69] + ">" + + self.entry_win.addstr(idx, 2, l, attr) + self.entry_win.refresh() + + c = self.screen.getch() + if c in (ord('q'), 27): # 27 == esc + break + elif c == curses.KEY_UP: + curline -= 1 + elif c == curses.KEY_DOWN: + curline += 1 + elif c == ord('b'): + self.isdone = True + break + elif c == ord('e'): + l = self.edit_line(img.lines[curline]) + if l is not None: + img.set_from_line(l, replace = curline) + elif c == ord('d'): + img.lines.pop(curline) + elif c == ord('o'): + img.lines.insert(curline+1, "") + curline += 1 + elif c == ord('O'): + img.lines.insert(curline, "") + elif c == ord('c'): + self.command_line_mode() + if self.isdone: + return + + # bound at the top and bottom + if curline < 1: + curline = 1 + elif curline >= len(img.lines): + curline = len(img.lines) - 1 + + if self.isdone: + origimg.reset(img.lines) + + def edit_line(self, line): + self.screen.clear() + self.screen.addstr(1, 2, "[ Minimal BASH-like line editing is supported. ") + self.screen.addstr(2, 2, " ESC at any time cancels. ENTER at any time accepts your changes. ]") + self.screen.refresh() + + t = GrubLineEditor(self.screen, 5, 2, line) + ret = t.edit() + if ret: + return ret + return None + + def command_line_mode(self): + self.screen.clear() + self.screen.addstr(1, 2, "[ Minimal BASH-like line editing is supported. ESC at any time ") + self.screen.addstr(2, 2, " exits. Typing 'boot' will boot with your entered commands. ] ") + self.screen.refresh() + + y = 5 + lines = [] + while 1: + t = GrubLineEditor(self.screen, y, 2) + ret = t.edit() + if ret: + if ret in ("quit", "return"): + break + elif ret != "boot": + y += 1 + lines.append(ret) + continue + + # if we got boot, then we want to boot the entered image + img = grub.GrubConf.GrubImage(lines) + self.cf.add_image(img) + self.selected_image = len(self.cf.images) - 1 + self.isdone = True + break + + # else, we cancelled and should just go back break - if fs is not None: - grubfile = None - for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", - "/grub/menu.lst", "/grub/grub.conf"): - if fs.file_exist(f): - grubfile = f - break - if grubfile is None: - raise RuntimeError, "we couldn't find /boot/grub{menu.lst,grub.conf} " + \ - "in the image provided. halt!" - f = fs.open_file(grubfile) - buf = f.read() - f.close() - fs.close() - # then parse the grub config - cf.parse(buf) - else: - raise RuntimeError, "Unable to read filesystem" - - return cf - + def read_config(self, fn, isConfig = False): + """Read the given file to parse the config. If isconfig, then + we're being given a raw config file rather than a disk image.""" + + if not os.access(fn, os.R_OK): + raise RuntimeError, "Unable to access %s" %(fn,) + + self.cf = grub.GrubConf.GrubConfigFile() + + if isConfig: + # set the config file and parse it + self.cf.filename = fn + self.cf.parse() + return + + offset = 0 + if is_disk_image(fn): + offset = get_active_offset(fn) + if offset == -1: + raise RuntimeError, "Unable to find active partition on disk" + + # open the image and read the grub config + fs = None + for fstype in grub.fsys.fstypes.values(): + if fstype.sniff_magic(fn, offset): + fs = fstype.open_fs(fn, offset) + break + + if fs is not None: + grubfile = None + for f in ("/boot/grub/menu.lst", "/boot/grub/grub.conf", + "/grub/menu.lst", "/grub/grub.conf"): + if fs.file_exist(f): + grubfile = f + break + if grubfile is None: + raise RuntimeError, "we couldn't find grub config file in the image provided." + f = fs.open_file(grubfile) + buf = f.read() + f.close() + fs.close() + # then parse the grub config + self.cf.parse(buf) + else: + raise RuntimeError, "Unable to read filesystem" + + def run(self): + timeout = int(self.cf.timeout) + + self.selected_image = self.cf.default + self.isdone = False + while not self.isdone: + self.run_main(timeout) + timeout = -1 + + return self.selected_image + + def run_main(self, timeout = -1): + def draw(): + # set up the screen + self.draw_main_windows() + self.text_win.addstr(0, 0, "Use the U and D keys to select which entry is highlighted.") + self.text_win.addstr(1, 0, "Press enter to boot the selected OS. 'e' to edit the") + self.text_win.addstr(2, 0, "commands before booting, 'a' to modify the kernel arguments ") + self.text_win.addstr(3, 0, "before booting, or 'c' for a command line.") + self.text_win.addch(0, 8, curses.ACS_UARROW) + self.text_win.addch(0, 14, curses.ACS_DARROW) + (y, x) = self.text_win.getmaxyx() + self.text_win.move(y - 1, x - 1) + self.text_win.refresh() + + # now loop until we hit the timeout or get a go from the user + mytime = 0 + while (timeout == -1 or mytime < int(timeout)): + draw() + if timeout != -1 and mytime != -1: + self.screen.addstr(20, 5, "Will boot selected entry in %2d seconds" + %(int(timeout) - mytime)) + else: + self.screen.addstr(20, 5, " " * 80) + + self.fill_entry_list() + c = self.screen.getch() + if mytime != -1: + mytime += 1 + + # handle keypresses + if c == ord('c'): + self.command_line_mode() + break + elif c == ord('a'): + # find the kernel line, edit it and then boot + img = self.cf.images[self.selected_image] + for line in img.lines: + if line.startswith("kernel"): + l = self.edit_line(line) + if l is not None: + img.set_from_line(l, replace = True) + self.isdone = True + break + break + elif c == ord('e'): + img = self.cf.images[self.selected_image] + self.edit_entry(img) + break + elif c in (curses.KEY_ENTER, ord('\n'), ord('\r')): + self.isdone = True + break + elif c == curses.KEY_UP: + mytime = -1 + self.selected_image -= 1 + elif c == curses.KEY_DOWN: + mytime = -1 + self.selected_image += 1 +# elif c in (ord('q'), 27): # 27 == esc +# self.selected_image = -1 +# self.isdone = True +# break + + # bound at the top and bottom + if self.selected_image < 0: + self.selected_image = 0 + elif self.selected_image >= len(self.cf.images): + self.selected_image = len(self.cf.images) - 1 + def get_entry_idx(cf, entry): # first, see if the given entry is numeric try: @@ -155,63 +421,12 @@ def get_entry_idx(cf, entry): return None -def main(cf = None): - mytime = 0 - timeout = int(cf.timeout) - - (stdscr, win) = draw_window() - stdscr.timeout(1000) - selected = cf.default - - while (timeout == -1 or mytime < int(timeout)): - if timeout != -1 and mytime != -1: - stdscr.addstr(20, 5, "Will boot selected entry in %2d seconds" - %(int(timeout) - mytime)) - else: - stdscr.addstr(20, 5, " " * 80) - - fill_entries(win, cf, selected) - c = stdscr.getch() - if mytime != -1: - mytime += 1 -# if c == ord('q'): -# selected = -1 -# break - if c == ord('c'): - # FIXME: needs to go to command line mode - continue - elif c == ord('a'): - # FIXME: needs to go to append mode - continue - elif c == ord('e'): - # FIXME: needs to go to edit mode - continue - elif c in (curses.KEY_ENTER, ord('\n'), ord('\r')): - break - elif c == curses.KEY_UP: - mytime = -1 - selected -= 1 - elif c == curses.KEY_DOWN: - mytime = -1 - selected += 1 - else: - pass - - # bound at the top and bottom - if selected < 0: - selected = 0 - elif selected >= len(cf.images): - selected = len(cf.images) - 1 - - if selected >= 0: - return selected - if __name__ == "__main__": sel = None def run_main(scr, *args): global sel - sel = main(cf) + sel = g.run() def usage(): print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--entry=] <image>" %(sys.argv[0],) @@ -253,24 +468,32 @@ if __name__ == "__main__": else: fd = os.open(output, os.O_WRONLY) - cf = get_config(file, isconfig) + g = Grub(file, isconfig) if interactive: curses.wrapper(run_main) else: - sel = cf.default + sel = g.cf.default # set the entry to boot as requested if entry is not None: - idx = get_entry_idx(cf, entry) - if idx is not None and idx > 0 and idx < len(cf.images): + idx = get_entry_idx(g.cf, entry) + if idx is not None and idx > 0 and idx < len(g.cf.images): sel = idx - img = cf.images[sel] + if sel == -1: + print "No kernel image selected!" + sys.exit(1) + + img = g.cf.images[sel] print "Going to boot %s" %(img.title) print " kernel: %s" %(img.kernel[1],) if img.initrd: print " initrd: %s" %(img.initrd[1],) + if isconfig: + print " args: %s" %(img.args,) + sys.exit(0) + offset = 0 if is_disk_image(file): offset = get_active_offset(file) @@ -288,14 +511,14 @@ if __name__ == "__main__": raise RuntimeError, "Unable to open filesystem" kernel = fs.open_file(img.kernel[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="vmlinuz.") + (tfd, fn) = tempfile.mkstemp(prefix="vmlinuz.", dir="/var/lib/xen") os.write(tfd, kernel) os.close(tfd) sxp = "linux (kernel %s)" %(fn,) if img.initrd: initrd = fs.open_file(img.initrd[1],).read() - (tfd, fn) = tempfile.mkstemp(prefix="initrd.") + (tfd, fn) = tempfile.mkstemp(prefix="initrd.", dir="/var/lib/xen") os.write(tfd, initrd) os.close(tfd) sxp += "(ramdisk %s)" %(fn,) diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/lowlevel/xc/xc.c Sun Aug 13 12:00:38 2006 -0400 @@ -165,8 +165,8 @@ static PyObject *pyxc_vcpu_setaffinity(X } static PyObject *pyxc_domain_setcpuweight(XcObject *self, - PyObject *args, - PyObject *kwds) + PyObject *args, + PyObject *kwds) { uint32_t dom; float cpuweight = 1; @@ -334,29 +334,29 @@ static PyObject *pyxc_linux_build(XcObje static char *kwd_list[] = { "dom", "store_evtchn", "console_evtchn", "image", - /* optional */ - "ramdisk", "cmdline", "flags", - "features", NULL }; + /* optional */ + "ramdisk", "cmdline", "flags", + "features", NULL }; if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssis", kwd_list, &dom, &store_evtchn, - &console_evtchn, &image, - /* optional */ - &ramdisk, &cmdline, &flags, - &features) ) + &console_evtchn, &image, + /* optional */ + &ramdisk, &cmdline, &flags, + &features) ) return NULL; if ( xc_linux_build(self->xc_handle, dom, image, ramdisk, cmdline, features, flags, store_evtchn, &store_mfn, - console_evtchn, &console_mfn) != 0 ) { + console_evtchn, &console_mfn) != 0 ) { if (!errno) errno = EINVAL; return PyErr_SetFromErrno(xc_error); } return Py_BuildValue("{s:i,s:i}", - "store_mfn", store_mfn, - "console_mfn", console_mfn); + "store_mfn", store_mfn, + "console_mfn", console_mfn); } static PyObject *pyxc_hvm_build(XcObject *self, @@ -373,16 +373,16 @@ static PyObject *pyxc_hvm_build(XcObject int apic = 0; unsigned long store_mfn = 0; - static char *kwd_list[] = { "dom", "store_evtchn", - "memsize", "image", "vcpus", "pae", "acpi", "apic", - NULL }; + static char *kwd_list[] = { "dom", "store_evtchn", "memsize", "image", + "vcpus", "pae", "acpi", "apic", + NULL }; if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiisiiii", kwd_list, &dom, &store_evtchn, &memsize, &image, &vcpus, &pae, &acpi, &apic) ) return NULL; if ( xc_hvm_build(self->xc_handle, dom, memsize, image, - vcpus, pae, acpi, apic, store_evtchn, &store_mfn) != 0 ) + vcpus, pae, acpi, apic, store_evtchn, &store_mfn) != 0 ) return PyErr_SetFromErrno(xc_error); return Py_BuildValue("{s:i}", "store_mfn", store_mfn); @@ -613,7 +613,7 @@ static PyObject *pyxc_physinfo(XcObject { p+=sprintf(p,"%08x:",info.hw_cap[i]); if(info.hw_cap[i]) - q=p; + q=p; } if(q>cpu_cap) *(q-1)=0; @@ -718,8 +718,8 @@ static PyObject *pyxc_sedf_domain_get(Xc "domain", domid, "period", period, "slice", slice, - "latency", latency, - "extratime", extratime, + "latency", latency, + "extratime", extratime, "weight", weight); } @@ -782,8 +782,8 @@ static PyObject *pyxc_domain_setmaxmem(X } static PyObject *pyxc_domain_memory_increase_reservation(XcObject *self, - PyObject *args, - PyObject *kwds) + PyObject *args, + PyObject *kwds) { uint32_t dom; unsigned long mem_kb; @@ -800,8 +800,8 @@ static PyObject *pyxc_domain_memory_incr know what they are doing */ nr_extents = (mem_kb / (XC_PAGE_SIZE/1024)) >> extent_order; if ( xc_domain_memory_increase_reservation(self->xc_handle, dom, - nr_extents, extent_order, - address_bits, NULL) ) + nr_extents, extent_order, + address_bits, NULL) ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/util/xmlrpclib2.py --- a/tools/python/xen/util/xmlrpclib2.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/util/xmlrpclib2.py Sun Aug 13 12:00:38 2006 -0400 @@ -138,6 +138,11 @@ class TCPXMLRPCServer(SocketServer.Threa def _marshaled_dispatch(self, data, dispatch_method = None): params, method = xmlrpclib.loads(data) + if False: + # Enable this block of code to exit immediately without sending + # a response. This allows you to test client-side crash handling. + import sys + sys.exit(1) try: if dispatch_method is not None: response = dispatch_method(method, params) diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/xend/XendDomain.py Sun Aug 13 12:00:38 2006 -0400 @@ -402,9 +402,9 @@ class XendDomain: val = dominfo.destroy() else: try: - val = xc.domain_destroy(domid) + val = xc.domain_destroy(int(domid)) except Exception, ex: - raise XendError(str(ex)) + raise XendInvalidDomain(str(domid)) return val def domain_migrate(self, domid, dst, live=False, resource=0, port=0): diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/xend/image.py Sun Aug 13 12:00:38 2006 -0400 @@ -251,7 +251,7 @@ class HVMImageHandler(ImageHandler): def parseDeviceModelArgs(self, imageConfig, deviceConfig): dmargs = [ 'boot', 'fda', 'fdb', 'soundhw', 'localtime', 'serial', 'stdvga', 'isa', 'vcpus', - 'acpi', 'usb', 'usbdevice'] + 'acpi', 'usb', 'usbdevice'] ret = [] for a in dmargs: v = sxp.child_value(imageConfig, a) @@ -260,7 +260,7 @@ class HVMImageHandler(ImageHandler): if a == 'stdvga': a = 'std-vga' # Handle booleans gracefully - if a in ['localtime', 'std-vga', 'isa', 'usb']: + if a in ['localtime', 'std-vga', 'isa', 'usb', 'acpi']: if v != None: v = int(v) if v: ret.append("-%s" % a) else: @@ -305,8 +305,6 @@ class HVMImageHandler(ImageHandler): def configVNC(self, config): # Handle graphics library related options vnc = sxp.child_value(config, 'vnc') - vncdisplay = sxp.child_value(config, 'vncdisplay', - int(self.vm.getDomid())) sdl = sxp.child_value(config, 'sdl') ret = [] nographic = sxp.child_value(config, 'nographic') @@ -314,7 +312,12 @@ class HVMImageHandler(ImageHandler): ret.append('-nographic') return ret if vnc: + vncdisplay = sxp.child_value(config, 'vncdisplay', + int(self.vm.getDomid())) ret = ret + ['-vnc', '%d' % vncdisplay, '-k', 'en-us'] + vncunused = sxp.child_value(config, 'vncunused') + if vncunused: + ret += ['-vncunused'] return ret def createDeviceModel(self): diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/xend/sxp.py --- a/tools/python/xen/xend/sxp.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/xend/sxp.py Sun Aug 13 12:00:38 2006 -0400 @@ -291,7 +291,7 @@ class Parser: raise ParseError(self, "unexpected EOF") elif '0' <= c <= '7': octaldigit(c) - elif len(self.buf): + elif len(self.state.buf): octaldone() self.input_char(c) diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/xend/tests/test_sxp.py --- a/tools/python/xen/xend/tests/test_sxp.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/xend/tests/test_sxp.py Sun Aug 13 12:00:38 2006 -0400 @@ -14,5 +14,26 @@ class test_sxp(unittest.TestCase): t('(String) (Thing)', [['String'], ['Thing']]) + def testParseFixed(self): + fin = file('../xen/xend/tests/xend-config.sxp', 'rb') + try: + config = xen.xend.sxp.parse(fin) + self.assertEqual( + xen.xend.sxp.child_value( + config, + 'xend-relocation-hosts-allow'), + '^localhost$ ^localhost\\.localdomain$') + finally: + fin.close() + + + def testParseConfigExample(self): + fin = file('../../examples/xend-config.sxp', 'rb') + try: + config = xen.xend.sxp.parse(fin) + finally: + fin.close() + + def test_suite(): return unittest.makeSuite(test_sxp) diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/xm/create.py Sun Aug 13 12:00:38 2006 -0400 @@ -411,6 +411,11 @@ gopts.var('vncdisplay', val='', gopts.var('vncdisplay', val='', fn=set_value, default=None, use="""VNC display to use""") + +gopts.var('vncunused', val='', + fn=set_bool, default=1, + use="""Try to find an unused port for the VNC server. + Only valid when vnc=1.""") gopts.var('sdl', val='', fn=set_value, default=None, @@ -627,7 +632,7 @@ def configure_hvm(config_image, vals): """ args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw', - 'vnc', 'vncdisplay', 'vncconsole', 'sdl', 'display', + 'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'sdl', 'display', 'acpi', 'apic', 'xauthority', 'usb', 'usbdevice' ] for a in args: if (vals.__dict__[a]): @@ -844,14 +849,58 @@ def choose_vnc_display(): vncpid = None +def daemonize(prog, args): + """Runs a program as a daemon with the list of arguments. Returns the PID + of the daemonized program, or returns 0 on error. + """ + r, w = os.pipe() + pid = os.fork() + + if pid == 0: + os.close(r) + w = os.fdopen(w, 'w') + os.setsid() + try: + pid2 = os.fork() + except: + pid2 = None + if pid2 == 0: + os.chdir("/") + for fd in range(0, 256): + try: + os.close(fd) + except: + pass + os.open("/dev/null", os.O_RDWR) + os.dup2(0, 1) + os.dup2(0, 2) + os.execvp(prog, args) + os._exit(1) + else: + w.write(str(pid2 or 0)) + w.close() + os._exit(0) + + os.close(w) + r = os.fdopen(r) + daemon_pid = int(r.read()) + r.close() + os.waitpid(pid, 0) + return daemon_pid + def spawn_vnc(display): + """Spawns a vncviewer that listens on the specified display. On success, + returns the port that the vncviewer is listening on and sets the global + vncpid. On failure, returns 0. Note that vncviewer is daemonized. + """ vncargs = (["vncviewer", "-log", "*:stdout:0", "-listen", "%d" % (VNC_BASE_PORT + display) ]) - global vncpid - vncpid = os.spawnvp(os.P_NOWAIT, "vncviewer", vncargs) - + global vncpid + vncpid = daemonize("vncviewer", vncargs) + if vncpid == 0: + return 0 return VNC_BASE_PORT + display - + def preprocess_vnc(vals): """If vnc was specified, spawn a vncviewer in listen mode and pass its address to the domain on the kernel command line. @@ -928,7 +977,7 @@ def make_domain(opts, config): import signal if vncpid: os.kill(vncpid, signal.SIGKILL) - raise ex + raise dom = sxp.child_value(dominfo, 'name') diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/xm/main.py Sun Aug 13 12:00:38 2006 -0400 @@ -31,6 +31,7 @@ warnings.filterwarnings('ignore', catego warnings.filterwarnings('ignore', category=FutureWarning) import xmlrpclib import traceback +import datetime import xen.xend.XendProtocol @@ -70,6 +71,7 @@ shutdown_help ="shutdown <DomId> [-w][-a shutdown_help ="shutdown <DomId> [-w][-a][-R|-H] Shutdown a domain" top_help = "top Monitor system and domains in real-time" unpause_help = "unpause <DomId> Unpause a paused domain" +uptime_help = "uptime [-s|--short] [DomId, ...] List uptime for domains" help_spacer = """ """ @@ -149,6 +151,7 @@ short_command_list = [ "shutdown", "top", "unpause", + "uptime", "vcpu-set", ] @@ -172,6 +175,7 @@ domain_commands = [ "sysrq", "top", "unpause", + "uptime", "vcpu-list", "vcpu-pin", "vcpu-set", @@ -412,6 +416,7 @@ def parse_doms_info(info): 'vcpus' : get_info('online_vcpus', int, 0), 'state' : get_info('state', str, '??'), 'cpu_time' : get_info('cpu_time', float, 0), + 'up_time' : get_info('up_time', float, -1), 'seclabel' : security.get_security_printlabel(info), } @@ -818,6 +823,59 @@ def xm_console(args): domid = int(sxp.child_value(info, 'domid', '-1')) console.execConsole(domid) +def xm_uptime(args): + short_mode = 0 + + try: + (options, params) = getopt.gnu_getopt(args, 's', ['short']) + except getopt.GetoptError, opterr: + err(opterr) + sys.exit(1) + + for (k, v) in options: + if k in ['-s', '--short']: + short_mode = 1 + + doms = getDomains(params) + + if short_mode == 0: + print 'Name ID Uptime' + + for dom in doms: + d = parse_doms_info(dom) + if d['dom'] > 0: + uptime = int(round(d['up_time'])) + else: + f=open('/proc/uptime', 'r') + upfile = f.read() + uptime = int(round(float(upfile.split(' ')[0]))) + f.close() + + days = int(uptime / 86400) + uptime -= (days * 86400) + hours = int(uptime / 3600) + uptime -= (hours * 3600) + minutes = int(uptime / 60) + uptime -= (minutes * 60) + seconds = uptime + + upstring = "" + if days > 0: + upstring += str(days) + " day" + if days > 1: + upstring += "s" + upstring += ", " + upstring += '%(hours)2d:%(minutes)02d' % vars() + + if short_mode: + now = datetime.datetime.now() + upstring = now.strftime(" %H:%M:%S") + " up " + upstring + upstring += ", " + d['name'] + " (" + str(d['dom']) + ")" + else: + upstring += ':%(seconds)02d' % vars() + upstring = ("%(name)-32s %(dom)3d " % d) + upstring + + print upstring def xm_top(args): arg_check(args, "top", 0) @@ -1117,6 +1175,7 @@ commands = { "save": xm_save, "reboot": xm_reboot, "shutdown": xm_shutdown, + "uptime": xm_uptime, "list": xm_list, # memory commands "mem-max": xm_mem_max, @@ -1257,6 +1316,16 @@ def main(argv=sys.argv): else: print >>sys.stderr, "Error: %s" % ex.faultString sys.exit(1) + except xmlrpclib.ProtocolError, ex: + if ex.errcode == -1: + print >>sys.stderr, ( + "Xend has probably crashed! Invalid or missing HTTP " + "status code.") + else: + print >>sys.stderr, ( + "Xend has probably crashed! ProtocolError(%d, %s)." % + (ex.errcode, ex.errmsg)) + sys.exit(1) except (ValueError, OverflowError): err("Invalid argument.") usage(argv[1]) diff -r bb510c274af8 -r 5f92043a3ab1 tools/python/xen/xm/tests/test_create.py --- a/tools/python/xen/xm/tests/test_create.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/python/xen/xm/tests/test_create.py Sun Aug 13 12:00:38 2006 -0400 @@ -179,7 +179,8 @@ ne2000=0 'extra' : ('VNC_VIEWER=%s:%d ' % (xen.xm.create.get_host_addr(), - xen.xm.create.VNC_BASE_PORT + 1)), + xen.xm.create.VNC_BASE_PORT + + xen.xm.create.choose_vnc_display())), 'vnc' : 1, 'vncviewer' : 1, diff -r bb510c274af8 -r 5f92043a3ab1 tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/xenstat/libxenstat/src/xenstat.c Sun Aug 13 12:00:38 2006 -0400 @@ -103,7 +103,10 @@ struct xenstat_vbd { * Data-collection types */ /* Called to collect the information for the node and all the domains on - * it. When called, the domain information has already been collected. */ + * it. When called, the domain information has already been collected. + * Return status is 0 if fatal error occurs, 1 for success. Collectors + * may prune a domain from the list if it has been deleted between the + * time the list was setup and the time the colector is called */ typedef int (*xenstat_collect_func)(xenstat_node * node); /* Called to free the information collected by the collect function. The free * function will only be called on a xenstat_node if that node includes @@ -134,6 +137,7 @@ static void xenstat_uninit_xen_version(x static void xenstat_uninit_xen_version(xenstat_handle * handle); static void xenstat_uninit_vbds(xenstat_handle * handle); static char *xenstat_get_domain_name(xenstat_handle * handle, unsigned int domain_id); +static void xenstat_prune_domain(xenstat_node *node, unsigned int entry); static xenstat_collector collectors[] = { { XENSTAT_VCPU, xenstat_collect_vcpus, @@ -208,7 +212,7 @@ xenstat_node *xenstat_get_node(xenstat_h xenstat_node *node; dom0_physinfo_t physinfo; dom0_getdomaininfo_t domaininfo[DOMAIN_CHUNK_SIZE]; - unsigned int num_domains, new_domains; + unsigned int new_domains; unsigned int i; /* Create the node */ @@ -242,15 +246,17 @@ xenstat_node *xenstat_get_node(xenstat_h return NULL; } - num_domains = 0; + node->num_domains = 0; do { xenstat_domain *domain, *tmp; new_domains = xc_domain_getinfolist(handle->xc_handle, - num_domains, DOMAIN_CHUNK_SIZE, domaininfo); + node->num_domains, + DOMAIN_CHUNK_SIZE, + domaininfo); tmp = realloc(node->domains, - (num_domains + new_domains) + (node->num_domains + new_domains) * sizeof(xenstat_domain)); if (tmp == NULL) { free(node->domains); @@ -259,12 +265,29 @@ xenstat_node *xenstat_get_node(xenstat_h } node->domains = tmp; - domain = node->domains + num_domains; + domain = node->domains + node->num_domains; + + /* zero out newly allocated memory in case error occurs below */ + memset(domain, 0, new_domains * sizeof(xenstat_domain)); for (i = 0; i < new_domains; i++) { /* Fill in domain using domaininfo[i] */ domain->id = domaininfo[i].domain; - domain->name = xenstat_get_domain_name(handle, domaininfo[i].domain); + domain->name = xenstat_get_domain_name(handle, + domain->id); + if (domain->name == NULL) { + if (errno == ENOMEM) { + /* fatal error */ + xenstat_free_node(node); + return NULL; + } + else { + /* failed to get name -- this means the + domain is being destroyed so simply + ignore this entry */ + continue; + } + } domain->state = domaininfo[i].flags; domain->cpu_ns = domaininfo[i].cpu_time; domain->num_vcpus = (domaininfo[i].max_vcpu_id+1); @@ -284,10 +307,9 @@ xenstat_node *xenstat_get_node(xenstat_h domain->vbds = NULL; domain++; - } - num_domains += new_domains; + node->num_domains++; + } } while (new_domains == DOMAIN_CHUNK_SIZE); - node->num_domains = num_domains; /* Run all the extra data collectors requested */ node->flags = 0; @@ -495,10 +517,12 @@ xenstat_vbd *xenstat_domain_vbd(xenstat_ /* Collect information about VCPUs */ static int xenstat_collect_vcpus(xenstat_node * node) { - unsigned int i, vcpu; + unsigned int i, vcpu, inc_index; /* Fill in VCPU information */ - for (i = 0; i < node->num_domains; i++) { + for (i = 0; i < node->num_domains; i+=inc_index) { + inc_index = 1; /* default is to increment to next domain */ + node->domains[i].vcpus = malloc(node->domains[i].num_vcpus * sizeof(xenstat_vcpu)); if (node->domains[i].vcpus == NULL) @@ -509,11 +533,25 @@ static int xenstat_collect_vcpus(xenstat dom0_getvcpuinfo_t info; if (xc_vcpu_getinfo(node->handle->xc_handle, - node->domains[i].id, vcpu, &info) != 0) - return 0; - - node->domains[i].vcpus[vcpu].online = info.online; - node->domains[i].vcpus[vcpu].ns = info.cpu_time; + node->domains[i].id, vcpu, &info) != 0) { + if (errno == ENOMEM) { + /* fatal error */ + return 0; + } + else { + /* domain is in transition - remove + from list */ + xenstat_prune_domain(node, i); + + /* remember not to increment index! */ + inc_index = 0; + break; + } + } + else { + node->domains[i].vcpus[vcpu].online = info.online; + node->domains[i].vcpus[vcpu].ns = info.cpu_time; + } } } return 1; @@ -884,13 +922,30 @@ static char *xenstat_get_domain_name(xen static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int domain_id) { char path[80]; - char *name; snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id); - name = xs_read(handle->xshandle, XBT_NULL, path, NULL); - if (name == NULL) - name = strdup(" "); - - return name; + return xs_read(handle->xshandle, XBT_NULL, path, NULL); } + +/* Remove specified entry from list of domains */ +static void xenstat_prune_domain(xenstat_node *node, unsigned int entry) +{ + /* nothing to do if array is empty or entry is beyond end */ + if (node->num_domains == 0 || entry >= node->num_domains) + return; + + /* decrement count of domains */ + node->num_domains--; + + /* shift entries following specified entry up by one */ + if (entry < node->num_domains) { + xenstat_domain *domain = &node->domains[entry]; + memmove(domain,domain+1,node->num_domains-entry); + } + + /* zero out original last entry from node -- not + strictly necessary but safer! */ + memset(&node->domains[node->num_domains], 0, sizeof(xenstat_domain)); +} + diff -r bb510c274af8 -r 5f92043a3ab1 tools/xm-test/lib/XmTestLib/XenDevice.py --- a/tools/xm-test/lib/XmTestLib/XenDevice.py Fri Aug 11 13:30:48 2006 -0400 +++ b/tools/xm-test/lib/XmTestLib/XenDevice.py Sun Aug 13 12:00:38 2006 -0400 @@ -265,6 +265,7 @@ class XenNetDevice(XenDevice): self.ip = xmtest_netconf.getIP(self.domain.getName(), self.id) self.addIfconfigCmd() + self.config["ip"] = str(self.ip) # Setup an alias for Dom0 self.dom0_alias_ip = xmtest_netconf.getIP("domain0", self.domain.getName()) diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/Rules.mk Sun Aug 13 12:00:38 2006 -0400 @@ -3,7 +3,6 @@ HAS_ACPI := y VALIDATE_VT ?= n -xen_ia64_dom0_virtual_physical ?= y no_warns ?= n ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) @@ -36,9 +35,6 @@ ifeq ($(VALIDATE_VT),y) ifeq ($(VALIDATE_VT),y) CFLAGS += -DVALIDATE_VT endif -ifeq ($(xen_ia64_dom0_virtual_physical),y) -CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP -endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/asm-offsets.c Sun Aug 13 12:00:38 2006 -0400 @@ -32,6 +32,7 @@ void foo(void) DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); DEFINE(SHARED_INFO_SIZE, sizeof (struct shared_info)); + DEFINE(MAPPED_REGS_T_SIZE, sizeof (mapped_regs_t)); BLANK(); DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, offsetof (struct ia64_mca_cpu, init_stack)); @@ -210,4 +211,11 @@ void foo(void) DEFINE(IA64_KR_IO_BASE_OFFSET, offsetof (cpu_kr_ia64_t, _kr[IA64_KR_IO_BASE])); DEFINE(IA64_KR_CURRENT_STACK_OFFSET, offsetof (cpu_kr_ia64_t, _kr[IA64_KR_CURRENT_STACK])); +#ifdef PERF_COUNTERS + BLANK(); + DEFINE(RECOVER_TO_PAGE_FAULT_PERFC_OFS, offsetof (struct perfcounter, recover_to_page_fault)); + DEFINE(RECOVER_TO_BREAK_FAULT_PERFC_OFS, offsetof (struct perfcounter, recover_to_break_fault)); + DEFINE(FAST_HYPERPRIVOP_PERFC_OFS, offsetof (struct perfcounter, fast_hyperprivop)); + DEFINE(FAST_REFLECT_PERFC_OFS, offsetof (struct perfcounter, fast_reflect)); +#endif } diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/mmio.c --- a/xen/arch/ia64/vmx/mmio.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/mmio.c Sun Aug 13 12:00:38 2006 -0400 @@ -433,7 +433,10 @@ void emulate_io_inst(VCPU *vcpu, u64 pad u64 data, value,post_update, slot1a, slot1b, temp; INST64 inst; regs=vcpu_regs(vcpu); - bundle = __vmx_get_domain_bundle(regs->cr_iip); + if (IA64_RETRY == __vmx_get_domain_bundle(regs->cr_iip, &bundle)) { + /* if fetch code fail, return and try again */ + return; + } slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; if (!slot) inst.inst = bundle.slot0; else if (slot == 1){ @@ -494,6 +497,21 @@ void emulate_io_inst(VCPU *vcpu, u64 pad vcpu_set_gr(vcpu,inst.M3.r3,temp,0); } + } + // Floating-point spill + else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B && + inst.M9.m == 0 && inst.M9.x == 0) { + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M9.f2, &v); + /* Write high word. + FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; } // Floating-point spill + Imm update else if(inst.M10.major==7&&inst.M10.x6==0x3B){ diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmmu.c --- a/xen/arch/ia64/vmx/vmmu.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmmu.c Sun Aug 13 12:00:38 2006 -0400 @@ -305,18 +305,18 @@ int unimplemented_gva(VCPU *vcpu,u64 vad /* - * Prefetch guest bundle code. + * Fetch guest bundle code. * INPUT: - * code: buffer pointer to hold the read data. - * num: number of dword (8byts) to read. - */ -int -fetch_code(VCPU *vcpu, u64 gip, u64 *code1, u64 *code2) + * gip: guest ip + * pbundle: used to return fetched bundle. + */ +unsigned long +fetch_code(VCPU *vcpu, u64 gip, IA64_BUNDLE *pbundle) { u64 gpip=0; // guest physical IP u64 *vpa; thash_data_t *tlb; - u64 mfn; + u64 mfn, maddr; struct page_info* page; again: @@ -333,11 +333,16 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod if( gpip){ mfn = gmfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT); if( mfn == INVALID_MFN ) panic_domain(vcpu_regs(vcpu),"fetch_code: invalid memory\n"); + maddr = (mfn << PAGE_SHIFT) | (gpip & (PAGE_SIZE - 1)); }else{ tlb = vhpt_lookup(gip); - if( tlb == NULL) - panic_domain(vcpu_regs(vcpu),"No entry found in ITLB and DTLB\n"); + if (tlb == NULL) { + ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); + return IA64_RETRY; + } mfn = tlb->ppn >> (PAGE_SHIFT - ARCH_PAGE_SHIFT); + maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | + (gip & (PSIZE(tlb->ps) - 1)); } page = mfn_to_page(mfn); @@ -349,12 +354,12 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod } goto again; } - vpa = (u64 *)__va((mfn << PAGE_SHIFT) | (gip & (PAGE_SIZE - 1))); - - *code1 = *vpa++; - *code2 = *vpa; + vpa = (u64 *)__va(maddr); + + pbundle->i64[0] = *vpa++; + pbundle->i64[1] = *vpa; put_page(page); - return 1; + return IA64_NO_FAULT; } IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) @@ -371,7 +376,8 @@ IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UIN return IA64_FAULT; } #endif //VTLB_DEBUG - thash_purge_and_insert(vcpu, pte, itir, ifa); + pte &= ~PAGE_FLAGS_RV_MASK; + thash_purge_and_insert(vcpu, pte, itir, ifa, ISIDE_TLB); return IA64_NO_FAULT; } @@ -390,10 +396,11 @@ IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UIN return IA64_FAULT; } #endif //VTLB_DEBUG + pte &= ~PAGE_FLAGS_RV_MASK; gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT; if (VMX_DOMAIN(vcpu) && __gpfn_is_io(vcpu->domain, gpfn)) pte |= VTLB_PTE_IO; - thash_purge_and_insert(vcpu, pte, itir, ifa); + thash_purge_and_insert(vcpu, pte, itir, ifa, DSIDE_TLB); return IA64_NO_FAULT; } @@ -418,7 +425,8 @@ IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, u64 return IA64_FAULT; } thash_purge_entries(vcpu, va, ps); -#endif +#endif + pte &= ~PAGE_FLAGS_RV_MASK; vcpu_get_rr(vcpu, va, &rid); rid = rid& RR_RID_MASK; p_itr = (thash_data_t *)&vcpu->arch.itrs[slot]; @@ -432,8 +440,8 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64 { #ifdef VTLB_DEBUG int index; +#endif u64 gpfn; -#endif u64 ps, va, rid; thash_data_t * p_dtr; ps = itir_ps(itir); @@ -445,11 +453,12 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64 panic_domain(vcpu_regs(vcpu),"Tlb conflict!!"); return IA64_FAULT; } +#endif + pte &= ~PAGE_FLAGS_RV_MASK; thash_purge_entries(vcpu, va, ps); gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT; - if(VMX_DOMAIN(vcpu) && _gpfn_is_io(vcpu->domain,gpfn)) + if (VMX_DOMAIN(vcpu) && __gpfn_is_io(vcpu->domain, gpfn)) pte |= VTLB_PTE_IO; -#endif vcpu_get_rr(vcpu, va, &rid); rid = rid& RR_RID_MASK; p_dtr = (thash_data_t *)&vcpu->arch.dtrs[slot]; @@ -521,19 +530,23 @@ struct ptc_ga_args { static void ptc_ga_remote_func (void *varg) { - u64 oldrid, moldrid, mpta; + u64 oldrid, moldrid, mpta, oldpsbits, vadr; struct ptc_ga_args *args = (struct ptc_ga_args *)varg; VCPU *v = args->vcpu; + vadr = args->vadr; oldrid = VMX(v, vrr[0]); VMX(v, vrr[0]) = args->rid; + oldpsbits = VMX(v, psbits[0]); + VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vadr)]); moldrid = ia64_get_rr(0x0); ia64_set_rr(0x0,vrrtomrr(v,args->rid)); mpta = ia64_get_pta(); ia64_set_pta(v->arch.arch_vmx.mpta&(~1)); ia64_srlz_d(); - vmx_vcpu_ptc_l(v, args->vadr, args->ps); + vmx_vcpu_ptc_l(v, REGION_OFFSET(vadr), args->ps); VMX(v, vrr[0]) = oldrid; + VMX(v, psbits[0]) = oldpsbits; ia64_set_rr(0x0,moldrid); ia64_set_pta(mpta); ia64_dv_serialize_data(); @@ -547,7 +560,7 @@ IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UIN struct vcpu *v; struct ptc_ga_args args; - args.vadr = va<<3>>3; + args.vadr = va; vcpu_get_rr(vcpu, va, &args.rid); args.ps = ps; for_each_vcpu (d, v) { diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_entry.S --- a/xen/arch/ia64/vmx/vmx_entry.S Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_entry.S Sun Aug 13 12:00:38 2006 -0400 @@ -197,6 +197,11 @@ GLOBAL_ENTRY(ia64_leave_hypervisor) ;; mov ar.pfs=loc0 adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 ;; lfetch [r20],PT(CR_IPSR)-PT(PR) adds r2 = PT(B6)+16,r12 diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Sun Aug 13 12:00:38 2006 -0400 @@ -35,180 +35,50 @@ #include <asm/dom_fw.h> #include <xen/domain.h> -extern long do_sched_op_compat(int cmd, unsigned long arg); +long +do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) +{ + long rc = 0; -void hyper_not_support(void) -{ - VCPU *vcpu=current; - vcpu_set_gr(vcpu, 8, -1, 0); - vmx_vcpu_increment_iip(vcpu); + switch (op) { + case HVMOP_set_param: + case HVMOP_get_param: + { + struct xen_hvm_param a; + struct domain *d; + + if (copy_from_guest(&a, arg, 1)) + return -EFAULT; + + if (a.index > HVM_NR_PARAMS) + return -EINVAL; + + if (a.domid == DOMID_SELF) { + get_knownalive_domain(current->domain); + d = current->domain; + } + else if (IS_PRIV(current->domain)) { + d = find_domain_by_id(a.domid); + if (!d) + return -ESRCH; + } + else + return -EPERM; + + if (op == HVMOP_set_param) { + rc = 0; + d->arch.hvm_domain.params[a.index] = a.value; + } + else + rc = d->arch.hvm_domain.params[a.index]; + + put_domain(d); + return rc; + } + + default: + DPRINTK("Bad HVM op %ld.\n", op); + rc = -ENOSYS; + } + return rc; } - -void hyper_mmu_update(void) -{ - VCPU *vcpu=current; - u64 r32,r33,r34,r35,ret; - vcpu_get_gr_nat(vcpu,16,&r32); - vcpu_get_gr_nat(vcpu,17,&r33); - vcpu_get_gr_nat(vcpu,18,&r34); - vcpu_get_gr_nat(vcpu,19,&r35); - ret=vmx_do_mmu_update((mmu_update_t*)r32,r33,(u64 *)r34,r35); - vcpu_set_gr(vcpu, 8, ret, 0); - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_dom_mem_op(void) -{ - VCPU *vcpu=current; - u64 r32,r33,r34,r35,r36; - u64 ret; - vcpu_get_gr_nat(vcpu,16,&r32); - vcpu_get_gr_nat(vcpu,17,&r33); - vcpu_get_gr_nat(vcpu,18,&r34); - vcpu_get_gr_nat(vcpu,19,&r35); - vcpu_get_gr_nat(vcpu,20,&r36); -// ret=do_dom_mem_op(r32,(u64 *)r33,r34,r35,r36); - ret = 0; - printf("do_dom_mem return value: %lx\n", ret); - vcpu_set_gr(vcpu, 8, ret, 0); - - /* Hard to define a special return value to indicate hypercall restart. - * So just add a new mark, which is SMP safe - */ - if (vcpu->arch.hypercall_continuation == 1) - vcpu->arch.hypercall_continuation = 0; - else - vmx_vcpu_increment_iip(vcpu); -} - - -void hyper_sched_op_compat(void) -{ - VCPU *vcpu=current; - u64 r32,r33,ret; - vcpu_get_gr_nat(vcpu,16,&r32); - vcpu_get_gr_nat(vcpu,17,&r33); - ret=do_sched_op_compat(r32,r33); - vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_dom0_op(void) -{ - VCPU *vcpu=current; - u64 r32,ret; - vcpu_get_gr_nat(vcpu,16,&r32); - ret=do_dom0_op(guest_handle_from_ptr(r32, dom0_op_t)); - vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_event_channel_op_compat(void) -{ - VCPU *vcpu=current; - u64 r32,ret; - vcpu_get_gr_nat(vcpu,16,&r32); - ret=do_event_channel_op_compat(guest_handle_from_ptr(r32, evtchn_op_t)); - vcpu_set_gr(vcpu, 8, ret, 0); - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_xen_version(void) -{ - VCPU *vcpu=current; - u64 r32,r33,ret; - vcpu_get_gr_nat(vcpu,16,&r32); - vcpu_get_gr_nat(vcpu,17,&r33); - ret=do_xen_version((int )r32,guest_handle_from_ptr(r33, void)); - vcpu_set_gr(vcpu, 8, ret, 0); - vmx_vcpu_increment_iip(vcpu); -} -/* -static int do_lock_page(VCPU *vcpu, u64 va, u64 lock) -{ - ia64_rr rr; - thash_cb_t *hcb; - hcb = vmx_vcpu_get_vtlb(vcpu); - rr = vmx_vcpu_rr(vcpu, va); - return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock); -} - */ -/* - * Lock guest page in vTLB, so that it's not relinquished by recycle - * session when HV is servicing that hypercall. - */ - -/* -void hyper_lock_page(void) -{ -//TODO: - VCPU *vcpu=current; - u64 va,lock, ret; - vcpu_get_gr_nat(vcpu,16,&va); - vcpu_get_gr_nat(vcpu,17,&lock); - ret=do_lock_page(vcpu, va, lock); - vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - */ - -static int do_set_shared_page(VCPU *vcpu, u64 gpa) -{ - u64 o_info; - struct domain *d = vcpu->domain; - struct vcpu *v; - struct page_info *page; - if(vcpu->domain!=dom0) - return -EPERM; - o_info = (u64)vcpu->domain->shared_info; - again: - d->shared_info= (shared_info_t *)domain_mpa_to_imva(vcpu->domain, gpa); - page = virt_to_page(d->shared_info); - if (get_page(page, d) == 0) - goto again; - - /* Copy existing shared info into new page */ - if (o_info) { - memcpy((void*)d->shared_info, (void*)o_info, PAGE_SIZE); - for_each_vcpu(d, v) { - v->vcpu_info = &d->shared_info->vcpu_info[v->vcpu_id]; - } - /* If original page belongs to xen heap, then relinguish back - * to xen heap. Or else, leave to domain itself to decide. - */ - if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info)))) - free_xenheap_page((void *)o_info); - } else - memset(d->shared_info, 0, PAGE_SIZE); - put_page(page); - return 0; -} - -void hyper_set_shared_page(void) -{ - VCPU *vcpu=current; - u64 gpa,ret; - vcpu_get_gr_nat(vcpu,16,&gpa); - - ret=do_set_shared_page(vcpu, gpa); - vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - -/* -void hyper_grant_table_op(void) -{ - VCPU *vcpu=current; - u64 r32,r33,r34,ret; - vcpu_get_gr_nat(vcpu,16,&r32); - vcpu_get_gr_nat(vcpu,17,&r33); - vcpu_get_gr_nat(vcpu,18,&r34); - - ret=do_grant_table_op((unsigned int)r32, (void *)r33, (unsigned int)r34); - vcpu_set_gr(vcpu, 8, ret, 0); -} -*/ diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_ivt.S --- a/xen/arch/ia64/vmx/vmx_ivt.S Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_ivt.S Sun Aug 13 12:00:38 2006 -0400 @@ -423,7 +423,6 @@ ENTRY(vmx_break_fault) mov r31=pr mov r19=11 mov r30=cr.iim - movl r29=0x1100 ;; #ifdef VTI_DEBUG // break 0 is already handled in vmx_ia64_handle_break. @@ -431,9 +430,7 @@ ENTRY(vmx_break_fault) (p6) br.sptk vmx_fault_11 ;; #endif - cmp.eq p6,p7=r29,r30 - (p6) br.dptk.few vmx_hypercall_dispatch - (p7) br.sptk.many vmx_dispatch_break_fault + br.sptk.many vmx_dispatch_break_fault ;; VMX_FAULT(11); END(vmx_break_fault) @@ -1140,33 +1137,6 @@ END(vmx_dispatch_break_fault) END(vmx_dispatch_break_fault) -ENTRY(vmx_hypercall_dispatch) - VMX_SAVE_MIN_WITH_COVER - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - VMX_SAVE_REST - ;; - movl r14=ia64_leave_hypervisor - movl r2=hyper_call_table - ;; - mov rp=r14 - shladd r2=r15,3,r2 - ;; - ld8 r2=[r2] - ;; - mov b6=r2 - ;; - br.call.sptk.many b6=b6 - ;; -END(vmx_hypercall_dispatch) - - - ENTRY(vmx_dispatch_interrupt) VMX_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 ;; @@ -1187,39 +1157,3 @@ ENTRY(vmx_dispatch_interrupt) add out1=16,sp // pass pointer to pt_regs as second arg br.call.sptk.many b6=ia64_handle_irq END(vmx_dispatch_interrupt) - - - - .rodata - .align 8 - .globl hyper_call_table -hyper_call_table: - data8 hyper_not_support //hyper_set_trap_table /* 0 */ - data8 hyper_mmu_update - data8 hyper_not_support //hyper_set_gdt - data8 hyper_not_support //hyper_stack_switch - data8 hyper_not_support //hyper_set_callbacks - data8 hyper_not_support //hyper_fpu_taskswitch /* 5 */ - data8 hyper_sched_op_compat - data8 hyper_dom0_op - data8 hyper_not_support //hyper_set_debugreg - data8 hyper_not_support //hyper_get_debugreg - data8 hyper_not_support //hyper_update_descriptor /* 10 */ - data8 hyper_not_support //hyper_set_fast_trap - data8 hyper_dom_mem_op - data8 hyper_not_support //hyper_multicall - data8 hyper_not_support //hyper_update_va_mapping - data8 hyper_not_support //hyper_set_timer_op /* 15 */ - data8 hyper_event_channel_op_compat - data8 hyper_xen_version - data8 hyper_not_support //hyper_console_io - data8 hyper_not_support //hyper_physdev_op - data8 hyper_not_support //hyper_grant_table_op /* 20 */ - data8 hyper_not_support //hyper_vm_assist - data8 hyper_not_support //hyper_update_va_mapping_otherdomain - data8 hyper_not_support //hyper_switch_vm86 - data8 hyper_not_support //hyper_boot_vcpu - data8 hyper_not_support //hyper_ni_hypercall /* 25 */ - data8 hyper_not_support //hyper_mmuext_op - data8 hyper_not_support //tata8 hyper_lock_page - data8 hyper_set_shared_page diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_minstate.h --- a/xen/arch/ia64/vmx/vmx_minstate.h Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_minstate.h Sun Aug 13 12:00:38 2006 -0400 @@ -269,7 +269,12 @@ ;; \ st8 [r24]=r9; /* ar.csd */ \ st8 [r25]=r10; /* ar.ssd */ \ - ;; + ;; \ + mov r18=ar.unat; \ + adds r19=PT(EML_UNAT)-PT(R4),r2; \ + ;; \ + st8 [r19]=r18; /* eml_unat */ \ + #define VMX_SAVE_EXTRA \ .mem.offset 0,0; st8.spill [r2]=r4,16; \ diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_phy_mode.c --- a/xen/arch/ia64/vmx/vmx_phy_mode.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Sun Aug 13 12:00:38 2006 -0400 @@ -110,9 +110,14 @@ physical_tlb_miss(VCPU *vcpu, u64 vadr) physical_tlb_miss(VCPU *vcpu, u64 vadr) { u64 pte; + ia64_rr rr; + rr.rrval = ia64_get_rr(vadr); pte = vadr& _PAGE_PPN_MASK; - pte = pte | PHY_PAGE_WB; - thash_purge_and_insert(vcpu, pte, (PAGE_SHIFT<<2), vadr); + if (vadr >> 63) + pte = pte | PHY_PAGE_UC; + else + pte = pte | PHY_PAGE_WB; + thash_vhpt_insert(vcpu, pte, (rr.ps << 2), vadr); return; } @@ -120,19 +125,14 @@ void void vmx_init_all_rr(VCPU *vcpu) { - VMX(vcpu,vrr[VRN0]) = 0x38; - VMX(vcpu,vrr[VRN1]) = 0x138; - VMX(vcpu,vrr[VRN2]) = 0x238; - VMX(vcpu,vrr[VRN3]) = 0x338; - VMX(vcpu,vrr[VRN4]) = 0x438; - VMX(vcpu,vrr[VRN5]) = 0x538; - VMX(vcpu,vrr[VRN6]) = 0x660; - VMX(vcpu,vrr[VRN7]) = 0x760; -#if 0 - VMX(vcpu,mrr5) = vrrtomrr(vcpu, 0x38); - VMX(vcpu,mrr6) = vrrtomrr(vcpu, 0x60); - VMX(vcpu,mrr7) = vrrtomrr(vcpu, 0x60); -#endif + VMX(vcpu, vrr[VRN0]) = 0x38; + VMX(vcpu, vrr[VRN1]) = 0x38; + VMX(vcpu, vrr[VRN2]) = 0x38; + VMX(vcpu, vrr[VRN3]) = 0x38; + VMX(vcpu, vrr[VRN4]) = 0x38; + VMX(vcpu, vrr[VRN5]) = 0x38; + VMX(vcpu, vrr[VRN6]) = 0x38; + VMX(vcpu, vrr[VRN7]) = 0x738; } extern void * pal_vaddr; @@ -208,18 +208,19 @@ switch_to_physical_rid(VCPU *vcpu) switch_to_physical_rid(VCPU *vcpu) { UINT64 psr; - ia64_rr phy_rr; - + ia64_rr phy_rr, mrr; /* Save original virtual mode rr[0] and rr[4] */ psr=ia64_clear_ic(); phy_rr.rrval = vcpu->domain->arch.metaphysical_rr0; -// phy_rr.ps = EMUL_PHY_PAGE_SHIFT; + mrr.rrval = ia64_get_rr(VRN0 << VRN_SHIFT); + phy_rr.ps = mrr.ps; phy_rr.ve = 1; ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval); ia64_srlz_d(); phy_rr.rrval = vcpu->domain->arch.metaphysical_rr4; -// phy_rr.ps = EMUL_PHY_PAGE_SHIFT; + mrr.rrval = ia64_get_rr(VRN4 << VRN_SHIFT); + phy_rr.ps = mrr.ps; phy_rr.ve = 1; ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval); ia64_srlz_d(); @@ -262,6 +263,8 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_ act = mm_switch_action(old_psr, new_psr); switch (act) { case SW_V2P: +// printf("V -> P mode transition: (0x%lx -> 0x%lx)\n", +// old_psr.val, new_psr.val); vcpu->arch.old_rsc = regs->ar_rsc; switch_to_physical_rid(vcpu); /* @@ -272,6 +275,8 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_ vcpu->arch.mode_flags |= GUEST_IN_PHY; break; case SW_P2V: +// printf("P -> V mode transition: (0x%lx -> 0x%lx)\n", +// old_psr.val, new_psr.val); switch_to_virtual_rid(vcpu); /* * recover old mode which is saved when entering @@ -285,8 +290,8 @@ switch_mm_mode(VCPU *vcpu, IA64_PSR old_ old_psr.val); break; case SW_NOP: - printf("No action required for mode transition: (0x%lx -> 0x%lx)\n", - old_psr.val, new_psr.val); +// printf("No action required for mode transition: (0x%lx -> 0x%lx)\n", +// old_psr.val, new_psr.val); break; default: /* Sanity check */ diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_process.c --- a/xen/arch/ia64/vmx/vmx_process.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_process.c Sun Aug 13 12:00:38 2006 -0400 @@ -273,21 +273,24 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r // prepare_if_physical_mode(v); if((data=vtlb_lookup(v, vadr,type))!=0){ -// gppa = (vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); -// if(v->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(v->domain,gppa>>PAGE_SHIFT)){ - if(v->domain!=dom0 && data->io && type==DSIDE_TLB ){ - if(data->pl >= ((regs->cr_ipsr>>IA64_PSR_CPL0_BIT)&3)){ - gppa = (vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); - emulate_io_inst(v, gppa, data->ma); - }else{ - vcpu_set_isr(v,misr.val); - data_access_rights(v, vadr); - } - return IA64_FAULT; - } - + if (v->domain != dom0 && type == DSIDE_TLB) { + gppa = (vadr & ((1UL << data->ps) - 1)) + + (data->ppn >> (data->ps - 12) << data->ps); + if (__gpfn_is_io(v->domain, gppa >> PAGE_SHIFT)) { + if (data->pl >= ((regs->cr_ipsr >> IA64_PSR_CPL0_BIT) & 3)) + emulate_io_inst(v, gppa, data->ma); + else { + vcpu_set_isr(v, misr.val); + data_access_rights(v, vadr); + } + return IA64_FAULT; + } + } thash_vhpt_insert(v,data->page_flags, data->itir ,vadr); + }else if(type == DSIDE_TLB){ + if (misr.sp) + return vmx_handle_lds(regs); if(!vhpt_enabled(v, vadr, misr.rs?RSE_REF:DATA_REF)){ if(vpsr.ic){ vcpu_set_isr(v, misr.val); @@ -306,10 +309,11 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r } else{ vmx_vcpu_thash(v, vadr, &vhpt_adr); if(!guest_vhpt_lookup(vhpt_adr, &pteval)){ - if (pteval & _PAGE_P){ + if ((pteval & _PAGE_P) && + ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST)) { vcpu_get_rr(v, vadr, &rr); itir = rr&(RR_RID_MASK | RR_PS_MASK); - thash_purge_and_insert(v, pteval, itir , vadr); + thash_purge_and_insert(v, pteval, itir, vadr, DSIDE_TLB); return IA64_NO_FAULT; } if(vpsr.ic){ @@ -357,7 +361,7 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r if (pteval & _PAGE_P){ vcpu_get_rr(v, vadr, &rr); itir = rr&(RR_RID_MASK | RR_PS_MASK); - thash_purge_and_insert(v, pteval, itir , vadr); + thash_purge_and_insert(v, pteval, itir, vadr, ISIDE_TLB); return IA64_NO_FAULT; } if(!vpsr.ic){ diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_vcpu.c --- a/xen/arch/ia64/vmx/vmx_vcpu.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_vcpu.c Sun Aug 13 12:00:38 2006 -0400 @@ -202,9 +202,7 @@ IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UI newrr.rrval=val; if (newrr.rid >= (1 << vcpu->domain->arch.rid_bits)) panic_domain (NULL, "use of invalid rid %x\n", newrr.rid); - if(oldrr.ps!=newrr.ps){ - thash_purge_all(vcpu); - } + VMX(vcpu,vrr[reg>>61]) = val; switch((u64)(reg>>61)) { case VRN7: @@ -272,7 +270,10 @@ IA64FAULT vmx_vcpu_rfi(VCPU *vcpu) UINT64 ifs, psr; REGS *regs = vcpu_regs(vcpu); psr = VCPU(vcpu,ipsr); - vcpu_bsw1(vcpu); + if (psr & IA64_PSR_BN) + vcpu_bsw1(vcpu); + else + vcpu_bsw0(vcpu); vmx_vcpu_set_psr(vcpu,psr); ifs=VCPU(vcpu,ifs); if(ifs>>63) diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vmx_virt.c --- a/xen/arch/ia64/vmx/vmx_virt.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_virt.c Sun Aug 13 12:00:38 2006 -0400 @@ -1334,11 +1334,10 @@ IA64FAULT vmx_emul_mov_from_cr(VCPU *vcp //#define BYPASS_VMAL_OPCODE extern IA64_SLOT_TYPE slot_types[0x20][3]; -IA64_BUNDLE __vmx_get_domain_bundle(u64 iip) -{ - IA64_BUNDLE bundle; - fetch_code( current, iip, &bundle.i64[0], &bundle.i64[1]); - return bundle; +unsigned long +__vmx_get_domain_bundle(u64 iip, IA64_BUNDLE *pbundle) +{ + return fetch_code(current, iip, pbundle); } /** Emulate a privileged operation. diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/vmx/vtlb.c --- a/xen/arch/ia64/vmx/vtlb.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/vmx/vtlb.c Sun Aug 13 12:00:38 2006 -0400 @@ -141,14 +141,18 @@ static void thash_recycle_cch(thash_cb_t static void vmx_vhpt_insert(thash_cb_t *hcb, u64 pte, u64 itir, u64 ifa) { - u64 tag; + u64 tag ,len; + ia64_rr rr; thash_data_t *head, *cch; pte = pte & ~PAGE_FLAGS_RV_MASK; - + rr.rrval = ia64_get_rr(ifa); head = (thash_data_t *)ia64_thash(ifa); tag = ia64_ttag(ifa); if( INVALID_VHPT(head) ) { + len = head->len; head->page_flags = pte; + head->len = len; + head->itir = rr.ps << 2; head->etag = tag; return; } @@ -160,10 +164,9 @@ static void vmx_vhpt_insert(thash_cb_t * else{ cch = __alloc_chain(hcb); } - cch->page_flags=head->page_flags; - cch->etag=head->etag; - cch->next=head->next; + *cch = *head; head->page_flags=pte; + head->itir = rr.ps << 2; head->etag=tag; head->next = cch; head->len = cch->len+1; @@ -210,7 +213,13 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte) u64 guest_vhpt_lookup(u64 iha, u64 *pte) { u64 ret; - vhpt_lookup(iha); + thash_data_t * data; + data = vhpt_lookup(iha); + if (data == NULL) { + data = vtlb_lookup(current, iha, DSIDE_TLB); + if (data != NULL) + thash_vhpt_insert(current, data->page_flags, data->itir ,iha); + } asm volatile ("rsm psr.ic|psr.i;;" "srlz.d;;" "ld8.s r9=[%1];;" @@ -231,40 +240,33 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte) * purge software guest tlb */ -static void vtlb_purge(VCPU *v, u64 va, u64 ps) -{ +void vtlb_purge(VCPU *v, u64 va, u64 ps) +{ + thash_data_t *cur; + u64 start, end, curadr, size, psbits, tag, def_size; + ia64_rr vrr; thash_cb_t *hcb = &v->arch.vtlb; - thash_data_t *hash_table, *prev, *next; - u64 start, end, size, tag, rid, def_size; - ia64_rr vrr; vcpu_get_rr(v, va, &vrr.rrval); - rid = vrr.rid; + psbits = VMX(v, psbits[(va >> 61)]); size = PSIZE(ps); start = va & (-size); end = start + size; - def_size = PSIZE(vrr.ps); - while(start < end){ - hash_table = vsa_thash(hcb->pta, start, vrr.rrval, &tag); - if(!INVALID_TLB(hash_table)){ - if(hash_table->etag == tag){ - hash_table->etag = 1UL<<63; - } - else{ - prev=hash_table; - next=prev->next; - while(next){ - if(next->etag == tag){ - next->etag = 1UL<<63; - break; - } - prev=next; - next=next->next; - } - } - } - start += def_size; - } -// machine_tlb_purge(va, ps); + while (psbits) { + curadr = start; + ps = __ffs(psbits); + psbits &= ~(1UL << ps); + def_size = PSIZE(ps); + vrr.ps = ps; + while (curadr < end) { + cur = vsa_thash(hcb->pta, curadr, vrr.rrval, &tag); + while (cur) { + if (cur->etag == tag && cur->ps == ps) + cur->etag = 1UL << 63; + cur = cur->next; + } + curadr += def_size; + } + } } @@ -274,30 +276,23 @@ static void vhpt_purge(VCPU *v, u64 va, static void vhpt_purge(VCPU *v, u64 va, u64 ps) { //thash_cb_t *hcb = &v->arch.vhpt; - thash_data_t *hash_table, *prev, *next; + thash_data_t *cur; u64 start, end, size, tag; + ia64_rr rr; size = PSIZE(ps); start = va & (-size); end = start + size; + rr.rrval = ia64_get_rr(va); + size = PSIZE(rr.ps); while(start < end){ - hash_table = (thash_data_t *)ia64_thash(start); + cur = (thash_data_t *)ia64_thash(start); tag = ia64_ttag(start); - if(hash_table->etag == tag ){ - hash_table->etag = 1UL<<63; - } - else{ - prev=hash_table; - next=prev->next; - while(next){ - if(next->etag == tag){ - next->etag = 1UL<<63; - break; - } - prev=next; - next=next->next; - } - } - start += PAGE_SIZE; + while (cur) { + if (cur->etag == tag) + cur->etag = 1UL << 63; + cur = cur->next; + } + start += size; } machine_tlb_purge(va, ps); } @@ -343,14 +338,15 @@ thash_data_t *__alloc_chain(thash_cb_t * * 3: The caller need to make sure the new entry will not overlap * with any existed entry. */ -void vtlb_insert(thash_cb_t *hcb, u64 pte, u64 itir, u64 va) -{ - thash_data_t *hash_table, *cch; +void vtlb_insert(VCPU *v, u64 pte, u64 itir, u64 va) +{ + thash_data_t *hash_table, *cch; /* int flag; */ ia64_rr vrr; /* u64 gppn, ppns, ppne; */ - u64 tag; - vcpu_get_rr(current, va, &vrr.rrval); + u64 tag, len; + thash_cb_t *hcb = &v->arch.vtlb; + vcpu_get_rr(v, va, &vrr.rrval); #ifdef VTLB_DEBUG if (vrr.ps != itir_ps(itir)) { // machine_tlb_insert(hcb->vcpu, entry); @@ -359,9 +355,13 @@ void vtlb_insert(thash_cb_t *hcb, u64 pt return; } #endif + vrr.ps = itir_ps(itir); + VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); hash_table = vsa_thash(hcb->pta, va, vrr.rrval, &tag); if( INVALID_TLB(hash_table) ) { + len = hash_table->len; hash_table->page_flags = pte; + hash_table->len = len; hash_table->itir=itir; hash_table->etag=tag; return; @@ -425,18 +425,23 @@ void thash_purge_entries(VCPU *v, u64 va u64 translate_phy_pte(VCPU *v, u64 *pte, u64 itir, u64 va) { - u64 ps, addr; + u64 ps, ps_mask, paddr, maddr; +// ia64_rr rr; union pte_flags phy_pte; ps = itir_ps(itir); + ps_mask = ~((1UL << ps) - 1); phy_pte.val = *pte; - addr = *pte; - addr = ((addr & _PAGE_PPN_MASK)>>ps<<ps)|(va&((1UL<<ps)-1)); - addr = lookup_domain_mpa(v->domain, addr, NULL); - if(addr & GPFN_IO_MASK){ + paddr = *pte; + paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); + maddr = lookup_domain_mpa(v->domain, paddr, NULL); + if (maddr & GPFN_IO_MASK) { *pte |= VTLB_PTE_IO; return -1; } - phy_pte.ppn = addr >> ARCH_PAGE_SHIFT; +// rr.rrval = ia64_get_rr(va); +// ps = rr.ps; + maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | (paddr & ~PAGE_MASK); + phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; return phy_pte.val; } @@ -445,12 +450,18 @@ u64 translate_phy_pte(VCPU *v, u64 *pte, * Purge overlap TCs and then insert the new entry to emulate itc ops. * Notes: Only TC entry can purge and insert. */ -void thash_purge_and_insert(VCPU *v, u64 pte, u64 itir, u64 ifa) +void thash_purge_and_insert(VCPU *v, u64 pte, u64 itir, u64 ifa, int type) { u64 ps;//, va; u64 phy_pte; + ia64_rr vrr, mrr; ps = itir_ps(itir); - + vcpu_get_rr(current, ifa, &vrr.rrval); + mrr.rrval = ia64_get_rr(ifa); +// if (vrr.ps != itir_ps(itir)) { +// printf("not preferred ps with va: 0x%lx vrr.ps=%d ps=%ld\n", +// ifa, vrr.ps, itir_ps(itir)); +// } if(VMX_DOMAIN(v)){ /* Ensure WB attribute if pte is related to a normal mem page, * which is required by vga acceleration since qemu maps shared @@ -460,30 +471,39 @@ void thash_purge_and_insert(VCPU *v, u64 pte &= ~_PAGE_MA_MASK; phy_pte = translate_phy_pte(v, &pte, itir, ifa); - if(ps==PAGE_SHIFT){ + vtlb_purge(v, ifa, ps); + vhpt_purge(v, ifa, ps); + if (ps == mrr.ps) { if(!(pte&VTLB_PTE_IO)){ - vhpt_purge(v, ifa, ps); vmx_vhpt_insert(&v->arch.vhpt, phy_pte, itir, ifa); } else{ - vhpt_purge(v, ifa, ps); - vtlb_insert(&v->arch.vtlb, pte, itir, ifa); + vtlb_insert(v, pte, itir, ifa); vcpu_quick_region_set(PSCBX(v,tc_regions),ifa); } } - else{ - vhpt_purge(v, ifa, ps); - vtlb_insert(&v->arch.vtlb, pte, itir, ifa); + else if (ps > mrr.ps) { + vtlb_insert(v, pte, itir, ifa); vcpu_quick_region_set(PSCBX(v,tc_regions),ifa); if(!(pte&VTLB_PTE_IO)){ vmx_vhpt_insert(&v->arch.vhpt, phy_pte, itir, ifa); } } + else { + u64 psr; + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type + 1, ifa, phy_pte, ps); + ia64_set_psr(psr); + ia64_srlz_i(); + // ps < mrr.ps, this is not supported + // panic_domain(NULL, "%s: ps (%lx) < mrr.ps \n", __func__, ps); + } } else{ phy_pte = translate_phy_pte(v, &pte, itir, ifa); if(ps!=PAGE_SHIFT){ - vtlb_insert(&v->arch.vtlb, pte, itir, ifa); + vtlb_insert(v, pte, itir, ifa); vcpu_quick_region_set(PSCBX(v,tc_regions),ifa); } machine_tlb_purge(ifa, ps); @@ -505,11 +525,15 @@ void thash_purge_all(VCPU *v) vtlb =&v->arch.vtlb; vhpt =&v->arch.vhpt; + for (num = 0; num < 8; num++) + VMX(v, psbits[num]) = 0; + head=vtlb->hash; num = (vtlb->hash_sz/sizeof(thash_data_t)); do{ head->page_flags = 0; head->etag = 1UL<<63; + head->itir = 0; head->next = 0; head++; num--; @@ -541,7 +565,7 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 v thash_data_t *vtlb_lookup(VCPU *v, u64 va,int is_data) { thash_data_t *cch; - u64 tag; + u64 psbits, ps, tag; ia64_rr vrr; thash_cb_t * hcb= &v->arch.vtlb; @@ -550,15 +574,19 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 v if(vcpu_quick_region_check(v->arch.tc_regions,va)==0) return NULL; - + psbits = VMX(v, psbits[(va >> 61)]); vcpu_get_rr(v,va,&vrr.rrval); - cch = vsa_thash( hcb->pta, va, vrr.rrval, &tag); - - do{ - if(cch->etag == tag) - return cch; - cch = cch->next; - }while(cch); + while (psbits) { + ps = __ffs(psbits); + psbits &= ~(1UL << ps); + vrr.ps = ps; + cch = vsa_thash(hcb->pta, va, vrr.rrval, &tag); + do { + if (cch->etag == tag && cch->ps == ps) + return cch; + cch = cch->next; + } while(cch); + } return NULL; } diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/dom0_ops.c Sun Aug 13 12:00:38 2006 -0400 @@ -34,129 +34,6 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ switch ( op->cmd ) { - case DOM0_GETPAGEFRAMEINFO: - { - struct page_info *page; - unsigned long mfn = op->u.getpageframeinfo.gmfn; - domid_t dom = op->u.getpageframeinfo.domain; - struct domain *d; - - ret = -EINVAL; - - if ( unlikely(!mfn_valid(mfn)) || - unlikely((d = find_domain_by_id(dom)) == NULL) ) - break; - - page = mfn_to_page(mfn); - - if ( likely(get_page(page, d)) ) - { - ret = 0; - - op->u.getpageframeinfo.type = NOTAB; - - if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) - { - switch ( page->u.inuse.type_info & PGT_type_mask ) - { - default: - panic("No such page type\n"); - break; - } - } - - put_page(page); - } - - put_domain(d); - - copy_to_guest(u_dom0_op, op, 1); - } - break; - - case DOM0_GETPAGEFRAMEINFO2: - { -#define GPF2_BATCH 128 - int n,j; - int num = op->u.getpageframeinfo2.num; - domid_t dom = op->u.getpageframeinfo2.domain; - struct domain *d; - unsigned long *l_arr; - ret = -ESRCH; - - if ( unlikely((d = find_domain_by_id(dom)) == NULL) ) - break; - - if ( unlikely(num > 1024) ) - { - ret = -E2BIG; - break; - } - - l_arr = (unsigned long *)alloc_xenheap_page(); - - ret = 0; - for( n = 0; n < num; ) - { - int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n); - - if ( copy_from_guest_offset(l_arr, op->u.getpageframeinfo2.array, - n, k) ) - { - ret = -EINVAL; - break; - } - - for( j = 0; j < k; j++ ) - { - struct page_info *page; - unsigned long mfn = l_arr[j]; - - if ( unlikely(mfn >= max_page) ) - goto e2_err; - - page = mfn_to_page(mfn); - - if ( likely(get_page(page, d)) ) - { - unsigned long type = 0; - - switch( page->u.inuse.type_info & PGT_type_mask ) - { - default: - panic("No such page type\n"); - break; - } - - if ( page->u.inuse.type_info & PGT_pinned ) - type |= LPINTAB; - l_arr[j] |= type; - put_page(page); - } - else - { - e2_err: - l_arr[j] |= XTAB; - } - - } - - if ( copy_to_guest_offset(op->u.getpageframeinfo2.array, - n, l_arr, k) ) - { - ret = -EINVAL; - break; - } - - n += j; - } - - free_xenheap_page((void *) l_arr); - - put_domain(d); - } - break; - case DOM0_GETMEMLIST: { unsigned long i; @@ -245,6 +122,15 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ ret = -EINVAL; break; } + if (!d->arch.is_vti) { + struct vcpu *v; + for_each_vcpu(d, v) { + BUG_ON(v->arch.privregs == NULL); + free_domheap_pages(virt_to_page(v->arch.privregs), + get_order_from_shift(XMAPPEDREGS_SHIFT)); + relinquish_vcpu_resources(v); + } + } d->arch.is_vti = 1; vmx_setup_platform(d); } @@ -313,7 +199,6 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ return ret; } -#ifdef CONFIG_XEN_IA64_DOM0_VP static unsigned long dom0vp_ioremap(struct domain *d, unsigned long mpaddr, unsigned long size) { @@ -374,7 +259,6 @@ do_dom0vp_op(unsigned long cmd, return ret; } -#endif /* * Local variables: diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/dom_fw.c Sun Aug 13 12:00:38 2006 -0400 @@ -28,7 +28,6 @@ static void dom_fw_init (struct domain * static void dom_fw_init (struct domain *d, struct ia64_boot_param *bp, char *fw_mem, int fw_mem_size, unsigned long maxmem); extern struct domain *dom0; -extern unsigned long dom0_start; extern unsigned long running_on_sim; @@ -169,12 +168,6 @@ static void dom_fpswa_hypercall_patch(st unsigned long entry_paddr = FW_HYPERCALL_FPSWA_ENTRY_PADDR; unsigned long patch_paddr = FW_HYPERCALL_FPSWA_PATCH_PADDR; -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (d == dom0) { - entry_paddr += dom0_start; - patch_paddr += dom0_start; - } -#endif ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, entry_paddr); ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, patch_paddr); entry_imva = domain_mpa_to_imva(d, entry_paddr); @@ -190,9 +183,6 @@ static void dom_efi_hypercall_patch(stru { unsigned long *imva; -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (d == dom0) paddr += dom0_start; -#endif ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, paddr); imva = domain_mpa_to_imva(d, paddr); build_hypercall_bundle(imva, d->arch.breakimm, hypercall, 1); @@ -223,12 +213,6 @@ void dom_fw_setup(struct domain *d, unsi struct ia64_boot_param *bp; dom_fw_base_mpa = 0; -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (d == dom0) { - dom_fw_base_mpa += dom0_start; - bp_mpa += dom0_start; - } -#endif ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, dom_fw_base_mpa); imva_fw_base = (unsigned long) domain_mpa_to_imva(d, dom_fw_base_mpa); ASSIGN_NEW_DOMAIN_PAGE_IF_DOM0(d, bp_mpa); @@ -269,10 +253,14 @@ acpi_update_lsapic (acpi_table_entry_hea enable = 0; if (lsapic->flags.enabled && enable) { printk("enable lsapic entry: 0x%lx\n", (u64)lsapic); + lsapic->id = lsapic_nbr; + lsapic->eid = 0; lsapic_nbr++; } else if (lsapic->flags.enabled) { printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic); lsapic->flags.enabled = 0; + lsapic->id = 0; + lsapic->eid = 0; } return 0; } @@ -342,6 +330,7 @@ dom_fw_fake_acpi(struct domain *d, struc struct acpi_table_lsapic *lsapic = tables->lsapic; int i; int aml_len; + int nbr_cpus; memset(tables, 0, sizeof(struct fake_acpi_tables)); @@ -452,8 +441,6 @@ dom_fw_fake_acpi(struct domain *d, struc /* setup MADT */ strncpy(madt->header.signature, APIC_SIG, 4); madt->header.revision = 2; - madt->header.length = sizeof(struct acpi_table_madt) + - MAX_VIRT_CPUS * sizeof(struct acpi_table_lsapic); strcpy(madt->header.oem_id, "XEN"); strcpy(madt->header.oem_table_id, "Xen/ia64"); strcpy(madt->header.asl_compiler_id, "XEN"); @@ -461,15 +448,20 @@ dom_fw_fake_acpi(struct domain *d, struc xen_minor_version(); /* An LSAPIC entry describes a CPU. */ + nbr_cpus = 0; for (i = 0; i < MAX_VIRT_CPUS; i++) { lsapic[i].header.type = ACPI_MADT_LSAPIC; lsapic[i].header.length = sizeof(struct acpi_table_lsapic); lsapic[i].acpi_id = i; lsapic[i].id = i; lsapic[i].eid = 0; - lsapic[i].flags.enabled = (d->vcpu[i] != NULL); - } - + if (d->vcpu[i] != NULL) { + lsapic[i].flags.enabled = 1; + nbr_cpus++; + } + } + madt->header.length = sizeof(struct acpi_table_madt) + + nbr_cpus * sizeof(struct acpi_table_lsapic); madt->header.checksum = generate_acpi_checksum(madt, madt->header.length); return; @@ -479,10 +471,8 @@ dom_fw_fake_acpi(struct domain *d, struc #define NUM_MEM_DESCS 64 //large enough struct dom0_passthrough_arg { -#ifdef CONFIG_XEN_IA64_DOM0_VP struct domain* d; int flags; -#endif efi_memory_desc_t *md; int* i; }; @@ -492,8 +482,6 @@ dom_fw_dom0_passthrough(efi_memory_desc_ { struct dom0_passthrough_arg* arg = (struct dom0_passthrough_arg*)arg__; unsigned long paddr; - -#ifdef CONFIG_XEN_IA64_DOM0_VP struct domain* d = arg->d; u64 start = md->phys_addr; u64 size = md->num_pages << EFI_PAGE_SHIFT; @@ -511,9 +499,6 @@ dom_fw_dom0_passthrough(efi_memory_desc_ paddr = assign_domain_mmio_page(d, start, size); } else paddr = assign_domain_mach_page(d, start, size, arg->flags); -#else - paddr = md->phys_addr; -#endif BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && md->type != EFI_RUNTIME_SERVICES_DATA && @@ -626,11 +611,7 @@ dom_fw_init (struct domain *d, struct ia unsigned char checksum = 0; char *cp, *fw_vendor; int num_mds, j, i = 0; -#ifdef CONFIG_XEN_IA64_DOM0_VP const unsigned long start_mpaddr = 0; -#else - const unsigned long start_mpaddr = ((d==dom0)?dom0_start:0); -#endif /* FIXME: should check size but for now we have a whole MB to play with. And if stealing code from fw-emu.c, watch out for new fw_vendor on the end! @@ -796,52 +777,24 @@ dom_fw_init (struct domain *d, struct ia dom_pa((unsigned long)fw_mem + fw_mem_size), 1); if (d == dom0) { -#ifndef CONFIG_XEN_IA64_DOM0_VP - /* - * This is a bad hack. Dom0 may share other domains' memory - * through a dom0 physical address. Unfortunately, this - * address may be used in maddr_to_page (e.g. in the loopback - * driver) but when Linux initializes memory it only creates - * page structs for the physical memory it knows about. And - * on ia64, only for full writeback granules. So, we reserve - * the last full granule of Xen's memory for dom0 (in - * start_kernel) to ensure dom0 creates a large enough memmap - */ - unsigned long last_start = max_page << PAGE_SHIFT; - unsigned long last_end = last_start + IA64_GRANULE_SIZE; - - /* simulate 1MB free memory at physical address zero */ - MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);//XXX -#endif /* hypercall patches live here, masquerade as reserved PAL memory */ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END, 0); - -#ifndef CONFIG_XEN_IA64_DOM0_VP - MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem-IA64_GRANULE_SIZE, 0);//XXX make sure this doesn't overlap on i/o, runtime area. -/* hack */ MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,last_start,last_end,1); -#endif /* pass through the I/O port space */ if (!running_on_sim) { struct dom0_passthrough_arg arg; arg.md = &efi_memmap[i]; arg.i = &i; -#ifdef CONFIG_XEN_IA64_DOM0_VP arg.d = d; arg.flags = ASSIGN_writable; -#endif //XXX Is this needed? efi_memmap_walk_type(EFI_RUNTIME_SERVICES_CODE, dom_fw_dom0_passthrough, &arg); // for ACPI table. -#ifdef CONFIG_XEN_IA64_DOM0_VP arg.flags = ASSIGN_readonly; -#endif efi_memmap_walk_type(EFI_RUNTIME_SERVICES_DATA, dom_fw_dom0_passthrough, &arg); -#ifdef CONFIG_XEN_IA64_DOM0_VP arg.flags = ASSIGN_writable; -#endif efi_memmap_walk_type(EFI_ACPI_RECLAIM_MEMORY, dom_fw_dom0_passthrough, &arg); efi_memmap_walk_type(EFI_ACPI_MEMORY_NVS, @@ -857,12 +810,6 @@ dom_fw_init (struct domain *d, struct ia } else MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); } else { -#ifndef CONFIG_XEN_IA64_DOM0_VP - /* Dom0 maps legacy mmio in first MB. */ - MAKE_MD(EFI_LOADER_DATA, EFI_MEMORY_WB, 0*MB, 1*MB, 1); - MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, - HYPERCALL_END, maxmem, 1); -#endif /* hypercall patches live here, masquerade as reserved PAL memory */ MAKE_MD(EFI_PAL_CODE, EFI_MEMORY_WB | EFI_MEMORY_RUNTIME, @@ -873,7 +820,6 @@ dom_fw_init (struct domain *d, struct ia MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); } -#ifdef CONFIG_XEN_IA64_DOM0_VP // simple // MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, // HYPERCALL_END, maxmem, 0); @@ -916,7 +862,6 @@ dom_fw_init (struct domain *d, struct ia if (next_start >= maxmem) break; } -#endif sort(efi_memmap, i, sizeof(efi_memory_desc_t), efi_mdt_cmp, NULL); bp->efi_systab = dom_pa((unsigned long) fw_mem); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/domain.c Sun Aug 13 12:00:38 2006 -0400 @@ -49,10 +49,6 @@ #include <asm/shadow.h> #include <asm/privop_stat.h> -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define CONFIG_DOMAIN0_CONTIGUOUS -#endif -unsigned long dom0_start = -1L; unsigned long dom0_size = 512*1024*1024; unsigned long dom0_align = 64*1024*1024; @@ -136,7 +132,6 @@ void context_switch(struct vcpu *prev, s uint64_t pta; local_irq_save(spsr); - context_switch_count++; __ia64_save_fpu(prev->arch._thread.fph); __ia64_load_fpu(next->arch._thread.fph); @@ -150,16 +145,6 @@ void context_switch(struct vcpu *prev, s /* Note: ia64_switch_to does not return here at vcpu initialization. */ //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask); - -// leave this debug for now: it acts as a heartbeat when more than -// one domain is active -{ -static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50}; -static int i = 100; -int id = ((struct vcpu *)current)->domain->domain_id & 0xf; -if (!cnt[id]--) { cnt[id] = 500000; printk("%x",id); } -if (!i--) { i = 1000000; printk("+"); } -} if (VMX_DOMAIN(current)){ vmx_load_all_rr(current); @@ -236,6 +221,14 @@ void startup_cpu_idle_loop(void) continue_cpu_idle_loop(); } +/* compile time test for get_order(sizeof(mapped_regs_t)) != + * get_order_from_shift(XMAPPEDREGS_SHIFT)) + */ +#if !(((1 << (XMAPPEDREGS_SHIFT - 1)) < MAPPED_REGS_T_SIZE) && \ + (MAPPED_REGS_T_SIZE < (1 << (XMAPPEDREGS_SHIFT + 1)))) +# error "XMAPPEDREGS_SHIFT doesn't match sizeof(mapped_regs_t)." +#endif + struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id) { struct vcpu *v; @@ -261,13 +254,17 @@ struct vcpu *alloc_vcpu_struct(struct do if (!is_idle_domain(d)) { if (!d->arch.is_vti) { - /* Create privregs page only if not VTi. */ - v->arch.privregs = - alloc_xenheap_pages(get_order(sizeof(mapped_regs_t))); + int order; + int i; + + /* Create privregs page only if not VTi. */ + order = get_order_from_shift(XMAPPEDREGS_SHIFT); + v->arch.privregs = alloc_xenheap_pages(order); BUG_ON(v->arch.privregs == NULL); - memset(v->arch.privregs, 0, PAGE_SIZE); - share_xen_page_with_guest(virt_to_page(v->arch.privregs), - d, XENSHARE_writable); + memset(v->arch.privregs, 0, 1 << XMAPPEDREGS_SHIFT); + for (i = 0; i < (1 << order); i++) + share_xen_page_with_guest(virt_to_page(v->arch.privregs) + + i, d, XENSHARE_writable); } v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0; @@ -295,15 +292,21 @@ struct vcpu *alloc_vcpu_struct(struct do return v; } +void relinquish_vcpu_resources(struct vcpu *v) +{ + if (v->arch.privregs != NULL) { + free_xenheap_pages(v->arch.privregs, + get_order_from_shift(XMAPPEDREGS_SHIFT)); + v->arch.privregs = NULL; + } +} + void free_vcpu_struct(struct vcpu *v) { if (VMX_DOMAIN(v)) vmx_relinquish_vcpu_resources(v); - else { - if (v->arch.privregs != NULL) - free_xenheap_pages(v->arch.privregs, - get_order_from_shift(XMAPPEDREGS_SHIFT)); - } + else + relinquish_vcpu_resources(v); free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); } @@ -516,9 +519,7 @@ static void relinquish_memory(struct dom /* Follow the list chain and /then/ potentially free the page. */ ent = ent->next; -#ifdef CONFIG_XEN_IA64_DOM0_VP BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY); -#endif put_page(page); } @@ -770,24 +771,6 @@ static void loaddomainelfimage(struct do elfaddr = (unsigned long) elfbase + phdr.p_offset; dom_mpaddr = phdr.p_paddr; -//printf("p_offset: %x, size=%x\n",elfaddr,filesz); -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) { - if (dom_mpaddr+memsz>dom0_size) - panic("Dom0 doesn't fit in memory space!\n"); - dom_imva = __va_ul(dom_mpaddr + dom0_start); - memcpy((void *)dom_imva, (void *)elfaddr, filesz); - if (memsz > filesz) - memset((void *)dom_imva+filesz, 0, - memsz-filesz); -//FIXME: This test for code seems to find a lot more than objdump -x does - if (phdr.p_flags & PF_X) { - privify_memory(dom_imva,filesz); - flush_icache_range (dom_imva, dom_imva+filesz); - } - } - else -#endif while (memsz > 0) { p = assign_new_domain_page(d,dom_mpaddr); BUG_ON (unlikely(p == NULL)); @@ -852,27 +835,10 @@ void alloc_dom0(void) if (running_on_sim) { dom0_size = 128*1024*1024; //FIXME: Should be configurable } -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - printf("alloc_dom0: starting (initializing %lu MB...)\n",dom0_size/(1024*1024)); - - /* FIXME: The first trunk (say 256M) should always be assigned to - * Dom0, since Dom0's physical == machine address for DMA purpose. - * Some old version linux, like 2.4, assumes physical memory existing - * in 2nd 64M space. + + /* no need to allocate pages for now + * pages are allocated by map_new_domain_page() via loaddomainelfimage() */ - dom0_start = alloc_boot_pages(dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT); - dom0_start <<= PAGE_SHIFT; - if (!dom0_start) { - panic("alloc_dom0: can't allocate contiguous memory size=%lu\n", - dom0_size); - } - printf("alloc_dom0: dom0_start=0x%lx\n", dom0_start); -#else - // no need to allocate pages for now - // pages are allocated by map_new_domain_page() via loaddomainelfimage() - dom0_start = 0; -#endif - } @@ -898,7 +864,6 @@ int construct_dom0(struct domain *d, char *cmdline) { int i, rc; - unsigned long alloc_start, alloc_end; start_info_t *si; struct vcpu *v = d->vcpu[0]; unsigned long max_pages; @@ -931,15 +896,9 @@ int construct_dom0(struct domain *d, printk("*** LOADING DOMAIN 0 ***\n"); - alloc_start = dom0_start; - alloc_end = dom0_start + dom0_size; max_pages = dom0_size / PAGE_SIZE; d->max_pages = max_pages; -#ifndef CONFIG_XEN_IA64_DOM0_VP - d->tot_pages = d->max_pages; -#else d->tot_pages = 0; -#endif dsi.image_addr = (unsigned long)image_start; dsi.image_len = image_len; rc = parseelfimage(&dsi); @@ -980,8 +939,7 @@ int construct_dom0(struct domain *d, if(initrd_start && initrd_len){ unsigned long offset; - pinitrd_start= (dom0_start + dom0_size) - - (PAGE_ALIGN(initrd_len) + 4*1024*1024); + pinitrd_start= dom0_size - (PAGE_ALIGN(initrd_len) + 4*1024*1024); if (pinitrd_start <= pstart_info) panic("%s:enough memory is not assigned to dom0", __func__); @@ -1036,25 +994,6 @@ int construct_dom0(struct domain *d, if (alloc_vcpu(d, i, i) == NULL) printf ("Cannot allocate dom0 vcpu %d\n", i); -#if defined(VALIDATE_VT) && !defined(CONFIG_XEN_IA64_DOM0_VP) - /* Construct a frame-allocation list for the initial domain, since these - * pages are allocated by boot allocator and pfns are not set properly - */ - for ( mfn = (alloc_start>>PAGE_SHIFT); - mfn < (alloc_end>>PAGE_SHIFT); - mfn++ ) - { - page = mfn_to_page(mfn); - page_set_owner(page, d); - page->u.inuse.type_info = 0; - page->count_info = PGC_allocated | 1; - list_add_tail(&page->list, &d->page_list); - - /* Construct 1:1 mapping */ - set_gpfn_from_mfn(mfn, mfn); - } -#endif - /* Copy the OS image. */ loaddomainelfimage(d,image_start); @@ -1106,17 +1045,14 @@ int construct_dom0(struct domain *d, bp->console_info.orig_y = bp->console_info.num_rows == 0 ? 0 : bp->console_info.num_rows - 1; - bp->initrd_start = (dom0_start+dom0_size) - - (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024); + bp->initrd_start = dom0_size - + (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024); bp->initrd_size = ia64_boot_param->initrd_size; vcpu_init_regs (v); vcpu_regs(v)->r28 = bp_mpa; -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - pkern_entry += dom0_start; -#endif vcpu_regs (v)->cr_iip = pkern_entry; physdev_init_dom0(d); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/faults.c Sun Aug 13 12:00:38 2006 -0400 @@ -51,8 +51,6 @@ extern IA64FAULT ia64_hypercall(struct p extern void do_ssc(unsigned long ssc, struct pt_regs *regs); -#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++; - // should never panic domain... if it does, stack may have been overrun void check_bad_nested_interruption(unsigned long isr, struct pt_regs *regs, unsigned long vector) { @@ -92,7 +90,7 @@ void reflect_interruption(unsigned long v->vcpu_info->evtchn_upcall_mask = 1; PSCB(v,interrupt_collection_enabled) = 0; - inc_slow_reflect_count(vector); + perfc_incra(slow_reflect, vector >> 8); } static unsigned long pending_false_positive = 0; @@ -170,7 +168,7 @@ handle_lazy_cover(struct vcpu *v, struct PSCB(v,ifs) = regs->cr_ifs; PSCB(v,incomplete_regframe) = 1; regs->cr_ifs = 0; - lazy_cover_count++; + perfc_incrc(lazy_cover); return(1); // retry same instruction with cr.ifs off } return(0); @@ -247,7 +245,7 @@ void ia64_do_page_fault (unsigned long a regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; // NOTE: nested trap must NOT pass PSCB address //regs->r31 = (unsigned long) &PSCB(current); - inc_slow_reflect_count(fault); + perfc_incra(slow_reflect, fault >> 8); return; } @@ -325,8 +323,10 @@ handle_fpu_swa (int fp_fault, struct pt_ if (!fp_fault && (ia64_psr(regs)->ri == 0)) fault_ip -= 16; - if (VMX_DOMAIN(current)) - bundle = __vmx_get_domain_bundle(fault_ip); + if (VMX_DOMAIN(current)) { + if (IA64_RETRY == __vmx_get_domain_bundle(fault_ip, &bundle)) + return IA64_RETRY; + } else bundle = __get_domain_bundle(fault_ip); @@ -557,6 +557,7 @@ ia64_handle_reflection (unsigned long if struct vcpu *v = current; unsigned long check_lazy_cover = 0; unsigned long psr = regs->cr_ipsr; + unsigned long status; /* Following faults shouldn'g be seen from Xen itself */ BUG_ON (!(psr & IA64_PSR_CPL)); @@ -617,14 +618,23 @@ ia64_handle_reflection (unsigned long if // FIXME: Should we handle unaligned refs in Xen?? vector = IA64_UNALIGNED_REF_VECTOR; break; case 32: - if (!(handle_fpu_swa(1, regs, isr))) { + status = handle_fpu_swa(1, regs, isr); + if (!status) { vcpu_increment_iip(v); return; } + // fetch code fail + if (IA64_RETRY == status) + return; printf("ia64_handle_reflection: handling FP fault\n"); vector = IA64_FP_FAULT_VECTOR; break; case 33: - if (!(handle_fpu_swa(0, regs, isr))) return; + status = handle_fpu_swa(0, regs, isr); + if (!status) + return; + // fetch code fail + if (IA64_RETRY == status) + return; printf("ia64_handle_reflection: handling FP trap\n"); vector = IA64_FP_TRAP_VECTOR; break; case 34: diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/hypercall.c Sun Aug 13 12:00:38 2006 -0400 @@ -70,7 +70,7 @@ hypercall_t ia64_hypercall_table[] = (hypercall_t)do_ni_hypercall, /* */ (hypercall_t)do_event_channel_op, (hypercall_t)do_physdev_op, - (hypercall_t)do_ni_hypercall, /* */ + (hypercall_t)do_hvm_op, /* */ (hypercall_t)do_ni_hypercall, /* */ /* 35 */ (hypercall_t)do_ni_hypercall, /* */ (hypercall_t)do_ni_hypercall, /* */ @@ -84,11 +84,7 @@ hypercall_t ia64_hypercall_table[] = (hypercall_t)do_ni_hypercall, /* */ /* 45 */ (hypercall_t)do_ni_hypercall, /* */ (hypercall_t)do_ni_hypercall, /* */ -#ifdef CONFIG_XEN_IA64_DOM0_VP (hypercall_t)do_dom0vp_op, /* dom0vp_op */ -#else - (hypercall_t)do_ni_hypercall, /* arch_0 */ -#endif (hypercall_t)do_ni_hypercall, /* arch_1 */ (hypercall_t)do_ni_hypercall, /* arch_2 */ /* 50 */ (hypercall_t)do_ni_hypercall, /* arch_3 */ @@ -210,7 +206,7 @@ fw_hypercall (struct pt_regs *regs) if (regs->r28 == PAL_HALT_LIGHT) { if (vcpu_deliverable_interrupts(v) || event_pending(v)) { - idle_when_pending++; + perfc_incrc(idle_when_pending); vcpu_pend_unspecified_interrupt(v); //printf("idle w/int#%d pending!\n",pi); //this shouldn't happen, but it apparently does quite a bit! so don't @@ -219,7 +215,7 @@ fw_hypercall (struct pt_regs *regs) //as deliver_pending_interrupt is called on the way out and will deliver it } else { - pal_halt_light_count++; + perfc_incrc(pal_halt_light); do_sched_op_compat(SCHEDOP_yield, 0); } regs->r8 = 0; @@ -319,7 +315,7 @@ ia64_hypercall (struct pt_regs *regs) /* Hypercalls are only allowed by kernel. Kernel checks memory accesses. */ - if (privlvl != 2) { + if (VMX_DOMAIN(v) ? (privlvl != 0) : (privlvl != 2)) { /* FIXME: Return a better error value ? Reflection ? Illegal operation ? */ regs->r8 = -1; @@ -335,7 +331,7 @@ unsigned long hypercall_create_continuat unsigned long hypercall_create_continuation( unsigned int op, const char *format, ...) { - struct mc_state *mcs = &mc_state[smp_processor_id()]; + struct mc_state *mcs = &this_cpu(mc_state); struct vcpu *v = current; const char *p = format; unsigned long arg; diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/hyperprivop.S Sun Aug 13 12:00:38 2006 -0400 @@ -22,34 +22,29 @@ #define _PAGE_PL_2 (2<<7) #if 1 // change to 0 to turn off all fast paths -#define FAST_HYPERPRIVOPS -#define FAST_HYPERPRIVOP_CNT -#define FAST_REFLECT_CNT +# define FAST_HYPERPRIVOPS +# ifdef PERF_COUNTERS +# define FAST_HYPERPRIVOP_CNT +# define FAST_HYPERPRIVOP_PERFC(N) \ + (perfcounters + FAST_HYPERPRIVOP_PERFC_OFS + (4 * N)) +# define FAST_REFLECT_CNT +# endif + //#define FAST_TICK // mostly working (unat problems) but default off for now //#define FAST_TLB_MISS_REFLECT // mostly working but default off for now -#ifdef CONFIG_XEN_IA64_DOM0_VP -#undef FAST_ITC //XXX CONFIG_XEN_IA64_DOM0_VP - // TODO fast_itc doesn't suport dom0 vp yet. -#else -//#define FAST_ITC // to be reviewed -#endif -#define FAST_BREAK -#ifndef CONFIG_XEN_IA64_DOM0_VP -# define FAST_ACCESS_REFLECT -#else -# undef FAST_ACCESS_REFLECT //XXX CONFIG_XEN_IA64_DOM0_VP - // TODO fast_access_reflect +# undef FAST_ITC //XXX TODO fast_itc doesn't suport dom0 vp yet. +# define FAST_BREAK +# undef FAST_ACCESS_REFLECT //XXX TODO fast_access_reflect // doesn't support dom0 vp yet. -#endif -#define FAST_RFI -#define FAST_SSM_I -#define FAST_PTC_GA -#undef RFI_TO_INTERRUPT // not working yet +# define FAST_RFI +# define FAST_SSM_I +# define FAST_PTC_GA +# undef RFI_TO_INTERRUPT // not working yet #endif #ifdef CONFIG_SMP -//#warning "FIXME: ptc.ga instruction requires spinlock for SMP" -#undef FAST_PTC_GA + //#warning "FIXME: ptc.ga instruction requires spinlock for SMP" + #undef FAST_PTC_GA #endif // FIXME: turn off for now... but NaTs may crash Xen so re-enable soon! @@ -237,10 +232,10 @@ ENTRY(hyper_ssm_i) cmp.ne p7,p0=r21,r0 (p7) br.sptk.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_I);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif // set shared_mem iip to instruction after HYPER_SSM_I extr.u r20=r30,41,2 ;; @@ -373,10 +368,10 @@ GLOBAL_ENTRY(fast_tick_reflect) mov rp=r29;; mov cr.itm=r26;; // ensure next tick #ifdef FAST_REFLECT_CNT - movl r20=fast_reflect_count+((0x3000>>8)*8);; - ld8 r21=[r20];; + movl r20=perfcounters+FAST_REFLECT_PERFC_OFS+((0x3000>>8)*4);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif // vcpu_pend_timer(current) movl r18=THIS_CPU(current_psr_ic_addr) @@ -611,12 +606,12 @@ END(fast_break_reflect) // r31 == pr ENTRY(fast_reflect) #ifdef FAST_REFLECT_CNT - movl r22=fast_reflect_count; - shr r23=r20,5;; + movl r22=perfcounters+FAST_REFLECT_PERFC_OFS; + shr r23=r20,8-2;; add r22=r22,r23;; - ld8 r21=[r22];; + ld4 r21=[r22];; adds r21=1,r21;; - st8 [r22]=r21;; + st4 [r22]=r21;; #endif // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!) adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; @@ -948,10 +943,10 @@ 1: // check the guest VHPT (p7) br.cond.spnt.few page_not_present;; #ifdef FAST_REFLECT_CNT - movl r21=fast_vhpt_translate_count;; - ld8 r22=[r21];; + movl r21=perfcounter+FAST_VHPT_TRANSLATE_PERFC_OFS;; + ld4 r22=[r21];; adds r22=1,r22;; - st8 [r21]=r22;; + st4 [r21]=r22;; #endif // prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte) @@ -977,11 +972,11 @@ END(fast_tlb_miss_reflect) // we get here if fast_insert fails (e.g. due to metaphysical lookup) ENTRY(recover_and_page_fault) -#ifdef FAST_REFLECT_CNT - movl r21=recover_to_page_fault_count;; - ld8 r22=[r21];; +#ifdef PERF_COUNTERS + movl r21=perfcounters + RECOVER_TO_PAGE_FAULT_PERFC_OFS;; + ld4 r22=[r21];; adds r22=1,r22;; - st8 [r21]=r22;; + st4 [r21]=r22;; #endif mov b0=r29;; br.cond.sptk.many page_fault;; @@ -1083,10 +1078,10 @@ 1: 1: // OK now, let's do an rfi. #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RFI);; - ld8 r23=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);; + ld4 r23=[r20];; adds r23=1,r23;; - st8 [r20]=r23;; + st4 [r20]=r23;; #endif #ifdef RFI_TO_INTERRUPT // maybe do an immediate interrupt delivery? @@ -1339,10 +1334,10 @@ END(rfi_with_interrupt) ENTRY(hyper_cover) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_COVER);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_COVER);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif mov r24=cr.ipsr mov r25=cr.iip;; @@ -1375,10 +1370,10 @@ END(hyper_cover) // return from metaphysical mode (meta=1) to virtual mode (meta=0) ENTRY(hyper_ssm_dt) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SSM_DT);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_DT);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif mov r24=cr.ipsr mov r25=cr.iip;; @@ -1412,10 +1407,10 @@ END(hyper_ssm_dt) // go to metaphysical mode (meta=1) from virtual mode (meta=0) ENTRY(hyper_rsm_dt) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_RSM_DT);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RSM_DT);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif mov r24=cr.ipsr mov r25=cr.iip;; @@ -1449,10 +1444,10 @@ END(hyper_rsm_dt) ENTRY(hyper_get_tpr) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_TPR);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_TPR);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif mov r24=cr.ipsr mov r25=cr.iip;; @@ -1478,10 +1473,10 @@ END(hyper_get_tpr) // (or accidentally missing) delivering an interrupt ENTRY(hyper_set_tpr) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_TPR);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_TPR);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif mov r24=cr.ipsr mov r25=cr.iip;; @@ -1506,10 +1501,10 @@ END(hyper_set_tpr) ENTRY(hyper_get_ivr) #ifdef FAST_HYPERPRIVOP_CNT - movl r22=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_IVR);; - ld8 r21=[r22];; + movl r22=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_IVR);; + ld4 r21=[r22];; adds r21=1,r21;; - st8 [r22]=r21;; + st4 [r22]=r21;; #endif mov r8=15;; // when we get to here r20=~=interrupts pending @@ -1618,10 +1613,10 @@ ENTRY(hyper_eoi) cmp.ne p7,p0=r20,r0 (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_EOI);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_EOI);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; ld8 r22=[r22];; @@ -1682,10 +1677,10 @@ ENTRY(hyper_set_itm) cmp.ne p7,p0=r20,r0 (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_ITM);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_ITM);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;; ld8 r21=[r20];; @@ -1723,10 +1718,10 @@ END(hyper_set_itm) ENTRY(hyper_get_rr) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_GET_RR);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_GET_RR);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif extr.u r25=r8,61,3;; adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; @@ -1755,10 +1750,10 @@ ENTRY(hyper_set_rr) cmp.leu p7,p0=7,r25 // punt on setting rr7 (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_RR);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif extr.u r26=r9,8,24 // r26 = r9.rid movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; @@ -1813,10 +1808,10 @@ ENTRY(hyper_set_kr) cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_SET_KR);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_KR);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18 ;; shl r20=r8,3;; @@ -1871,10 +1866,10 @@ END(hyper_set_kr) // r31 == pr ENTRY(hyper_thash) #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_THASH);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_THASH);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif shr.u r20 = r8, 61 addl r25 = 1, r0 @@ -1940,10 +1935,10 @@ ENTRY(hyper_ptc_ga) #endif // FIXME: validate not flushing Xen addresses #ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_PTC_GA);; - ld8 r21=[r20];; + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_PTC_GA);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif mov r28=r8 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2) @@ -2009,11 +2004,11 @@ END(hyper_ptc_ga) // recovery block for hyper_itc metaphysical memory lookup ENTRY(recover_and_dispatch_break_fault) -#ifdef FAST_REFLECT_CNT - movl r21=recover_to_break_fault_count;; - ld8 r22=[r21];; +#ifdef PERF_COUNTERS + movl r21=perfcounters + RECOVER_TO_BREAK_FAULT_PERFC_OFS;; + ld4 r22=[r21];; adds r22=1,r22;; - st8 [r21]=r22;; + st4 [r21]=r22;; #endif mov b0=r29 ;; br.sptk.many dispatch_break_fault;; @@ -2054,11 +2049,11 @@ hyper_itc_d: (p7) br.spnt.many dispatch_break_fault ;; #ifdef FAST_HYPERPRIVOP_CNT cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;; -(p6) movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_D);; -(p7) movl r20=fast_hyperpriv_cnt+(8*HYPERPRIVOP_ITC_I);; - ld8 r21=[r20];; +(p6) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_D);; +(p7) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_I);; + ld4 r21=[r20];; adds r21=1,r21;; - st8 [r20]=r21;; + st4 [r20]=r21;; #endif (p6) mov r17=2;; (p7) mov r17=3;; diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/mm.c Sun Aug 13 12:00:38 2006 -0400 @@ -173,12 +173,8 @@ #include <asm/shadow.h> #include <linux/efi.h> -#ifndef CONFIG_XEN_IA64_DOM0_VP -#define CONFIG_DOMAIN0_CONTIGUOUS -#else static void domain_page_flush(struct domain* d, unsigned long mpaddr, unsigned long old_mfn, unsigned long new_mfn); -#endif extern unsigned long ia64_iobase; @@ -268,12 +264,11 @@ relinquish_pte(struct domain* d, pte_t* return; } -#ifdef CONFIG_XEN_IA64_DOM0_VP if (page_get_owner(page) == d) { BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY); set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } -#endif + try_to_clear_PGC_allocate(d, page); put_page(page); } @@ -397,10 +392,6 @@ gmfn_to_mfn_foreign(struct domain *d, un { unsigned long pte; -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (d == dom0) - return(gpfn); -#endif pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT, NULL); if (!pte) { panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n"); @@ -427,34 +418,12 @@ u64 translate_domain_pte(u64 pteval, u64 // FIXME address had better be pre-validated on insert mask = ~itir_mask(itir.itir); mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask); -#ifdef CONFIG_XEN_IA64_DOM0_VP - if (itir.ps > PAGE_SHIFT) { + + if (itir.ps > PAGE_SHIFT) itir.ps = PAGE_SHIFT; - } -#endif + *logps = itir.ps; -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (d == dom0) { - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - /* - printk("translate_domain_pte: out-of-bounds dom0 mpaddr 0x%lx! itc=%lx...\n", - mpaddr, ia64_get_itc()); - */ - } - } - else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) { - /* Address beyond the limit. However the grant table is - also beyond the limit. Display a message if not in the - grant table. */ - if (mpaddr >= IA64_GRANT_TABLE_PADDR - && mpaddr < (IA64_GRANT_TABLE_PADDR - + (ORDER_GRANT_FRAMES << PAGE_SHIFT))) - printf("translate_domain_pte: bad mpa=0x%lx (> 0x%lx)," - "vadr=0x%lx,pteval=0x%lx,itir=0x%lx\n", - mpaddr, (unsigned long)d->max_pages<<PAGE_SHIFT, - address, pteval, itir.itir); - } -#endif + pteval2 = lookup_domain_mpa(d, mpaddr, entry); /* Check access rights. */ @@ -525,14 +494,6 @@ unsigned long translate_domain_mpaddr(un { unsigned long pteval; -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (current->domain == dom0) { - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr 0x%lx! continuing...\n", - mpaddr); - } - } -#endif pteval = lookup_domain_mpa(current->domain, mpaddr, entry); return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK)); } @@ -644,7 +605,6 @@ lookup_noalloc_domain_pte(struct domain* return (volatile pte_t*)pte_offset_map(pmd, mpaddr); } -#ifdef CONFIG_XEN_IA64_DOM0_VP static volatile pte_t* lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr) { @@ -684,26 +644,12 @@ ____lookup_domain_mpa(struct domain *d, return GPFN_INV_MASK; return INVALID_MFN; } -#endif unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr, struct p2m_entry* entry) { - volatile pte_t *pte; - -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) { - pte_t pteval; - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - //printk("lookup_domain_mpa: bad dom0 mpaddr 0x%lx!\n",mpaddr); - //printk("lookup_domain_mpa: start=0x%lx,end=0x%lx!\n",dom0_start,dom0_start+dom0_size); - } - pteval = pfn_pte(mpaddr >> PAGE_SHIFT, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)); - return pte_val(pteval); - } -#endif - pte = lookup_noalloc_domain_pte(d, mpaddr); + volatile pte_t *pte = lookup_noalloc_domain_pte(d, mpaddr); + if (pte != NULL) { pte_t tmp_pte = *pte;// pte is volatile. copy the value. if (pte_present(tmp_pte)) { @@ -757,27 +703,11 @@ static struct page_info * static struct page_info * __assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte) { - struct page_info *p = NULL; + struct page_info *p; unsigned long maddr; int ret; BUG_ON(!pte_none(*pte)); - -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) { -#if 0 - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - /* FIXME: is it true ? - dom0 memory is not contiguous! */ - panic("assign_new_domain_page: bad domain0 " - "mpaddr=%lx, start=%lx, end=%lx!\n", - mpaddr, dom0_start, dom0_start+dom0_size); - } -#endif - p = mfn_to_page((mpaddr >> PAGE_SHIFT)); - return p; - } -#endif p = alloc_domheap_page(d); if (unlikely(!p)) { @@ -812,25 +742,17 @@ struct page_info * struct page_info * assign_new_domain_page(struct domain *d, unsigned long mpaddr) { -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - pte_t dummy_pte = __pte(0); - return __assign_new_domain_page(d, mpaddr, &dummy_pte); -#else - struct page_info *p = NULL; - pte_t *pte; - - pte = __lookup_alloc_domain_pte(d, mpaddr); - if (pte_none(*pte)) - p = __assign_new_domain_page(d, mpaddr, pte); - - return p; -#endif + pte_t *pte = __lookup_alloc_domain_pte(d, mpaddr); + + if (!pte_none(*pte)) + return NULL; + + return __assign_new_domain_page(d, mpaddr, pte); } void assign_new_domain0_page(struct domain *d, unsigned long mpaddr) { -#ifndef CONFIG_DOMAIN0_CONTIGUOUS pte_t *pte; BUG_ON(d != dom0); @@ -841,7 +763,6 @@ assign_new_domain0_page(struct domain *d panic("%s: can't allocate page for dom0", __func__); } } -#endif } static unsigned long @@ -908,13 +829,27 @@ ioports_permit_access(struct domain *d, if (ret != 0) return ret; + /* Domain 0 doesn't virtualize IO ports space. */ + if (d == dom0) + return 0; + fp_offset = IO_SPACE_SPARSE_ENCODING(fp) & ~PAGE_MASK; lp_offset = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp)); for (off = fp_offset; off <= lp_offset; off += PAGE_SIZE) __assign_domain_page(d, IO_PORTS_PADDR + off, - ia64_iobase + off, ASSIGN_nocache); - + __pa(ia64_iobase) + off, ASSIGN_nocache); + + return 0; +} + +static int +ioports_has_allowed(struct domain *d, unsigned long fp, unsigned long lp) +{ + unsigned long i; + for (i = fp; i < lp; i++) + if (rangeset_contains_singleton(d->arch.ioport_caps, i)) + return 1; return 0; } @@ -924,20 +859,34 @@ ioports_deny_access(struct domain *d, un int ret; struct mm_struct *mm = &d->arch.mm; unsigned long off; + unsigned long io_ports_base; unsigned long fp_offset; unsigned long lp_offset; ret = rangeset_remove_range(d->arch.ioport_caps, fp, lp); if (ret != 0) return ret; - - fp_offset = IO_SPACE_SPARSE_ENCODING(fp) & ~PAGE_MASK; + if (d == dom0) + io_ports_base = __pa(ia64_iobase); + else + io_ports_base = IO_PORTS_PADDR; + + fp_offset = IO_SPACE_SPARSE_ENCODING(fp) & PAGE_MASK; lp_offset = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp)); - for (off = fp_offset; off <= lp_offset; off += PAGE_SIZE) { - unsigned long mpaddr = IO_PORTS_PADDR + off; + for (off = fp_offset; off < lp_offset; off += PAGE_SIZE) { + unsigned long mpaddr = io_ports_base + off; + unsigned long port; volatile pte_t *pte; pte_t old_pte; + + port = IO_SPACE_SPARSE_DECODING (off); + if (port < fp || port + IO_SPACE_SPARSE_PORTS_PER_PAGE - 1 > lp) { + /* Maybe this covers an allowed port. */ + if (ioports_has_allowed(d, port, + port + IO_SPACE_SPARSE_PORTS_PER_PAGE - 1)) + continue; + } pte = lookup_noalloc_domain_pte_none(d, mpaddr); BUG_ON(pte == NULL); @@ -950,7 +899,6 @@ ioports_deny_access(struct domain *d, un return 0; } -#ifdef CONFIG_XEN_IA64_DOM0_VP static void assign_domain_same_page(struct domain *d, unsigned long mpaddr, unsigned long size, @@ -1540,7 +1488,6 @@ domain_page_mapped(struct domain* d, uns return 1; return 0; } -#endif /* Flush cache of domain d. */ void domain_cache_flush (struct domain *d, int sync_only) @@ -1558,15 +1505,6 @@ void domain_cache_flush (struct domain * else flush_func = &flush_dcache_range; -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) { - /* This is not fully correct (because of hole), but it should - be enough for now. */ - (*flush_func)(__va_ul (dom0_start), - __va_ul (dom0_start + dom0_size)); - return; - } -#endif for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { pud_t *pud; if (!pgd_present(*pgd)) @@ -1642,11 +1580,6 @@ void pgtable_quicklist_free(void *pgtabl free_xenheap_page(pgtable_entry); } -void cleanup_writable_pagetable(struct domain *d) -{ - return; -} - void put_page_type(struct page_info *page) { u32 nx, x, y = page->u.inuse.type_info; @@ -1754,22 +1687,6 @@ int get_page_type(struct page_info *page { if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) { - if ( current->domain == page_get_owner(page) ) - { - /* - * This ensures functions like set_gdt() see up-to-date - * type info without needing to clean up writable p.t. - * state on the fast path. - */ - LOCK_BIGLOCK(current->domain); - cleanup_writable_pagetable(current->domain); - y = page->u.inuse.type_info; - UNLOCK_BIGLOCK(current->domain); - /* Can we make progress now? */ - if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) || - ((y & PGT_count_mask) == 0) ) - goto again; - } if ( ((x & PGT_type_mask) != PGT_l2_page_table) || ((type & PGT_type_mask) != PGT_l1_page_table) ) MEM_LOG("Bad type (saw %08x != exp %08x) " diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/privop.c --- a/xen/arch/ia64/xen/privop.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/privop.c Sun Aug 13 12:00:38 2006 -0400 @@ -275,7 +275,7 @@ static IA64FAULT priv_mov_to_cr(VCPU *vc static IA64FAULT priv_mov_to_cr(VCPU *vcpu, INST64 inst) { UINT64 val = vcpu_get_gr(vcpu, inst.M32.r2); - privcnt.to_cr_cnt[inst.M32.cr3]++; + perfc_incra(mov_to_cr, inst.M32.cr3); switch (inst.M32.cr3) { case 0: return vcpu_set_dcr(vcpu,val); case 1: return vcpu_set_itm(vcpu,val); @@ -417,7 +417,7 @@ static IA64FAULT priv_mov_from_cr(VCPU * UINT64 val; IA64FAULT fault; - privcnt.from_cr_cnt[inst.M33.cr3]++; + perfc_incra(mov_from_cr, inst.M33.cr3); switch (inst.M33.cr3) { case 0: return cr_get(dcr); case 1: return cr_get(itm); @@ -563,15 +563,15 @@ priv_handle_op(VCPU *vcpu, REGS *regs, i #endif if (inst.M29.x3 != 0) break; if (inst.M30.x4 == 8 && inst.M30.x2 == 2) { - privcnt.mov_to_ar_imm++; + perfc_incrc(mov_to_ar_imm); return priv_mov_to_ar_imm(vcpu,inst); } if (inst.M44.x4 == 6) { - privcnt.ssm++; + perfc_incrc(ssm); return priv_ssm(vcpu,inst); } if (inst.M44.x4 == 7) { - privcnt.rsm++; + perfc_incrc(rsm); return priv_rsm(vcpu,inst); } break; @@ -580,8 +580,9 @@ priv_handle_op(VCPU *vcpu, REGS *regs, i x6 = inst.M29.x6; if (x6 == 0x2a) { if (privify_en && inst.M29.r2 > 63 && inst.M29.ar3 < 8) - privcnt.mov_from_ar++; // privified mov from kr - else privcnt.mov_to_ar_reg++; + perfc_incrc(mov_from_ar); // privified mov from kr + else + perfc_incrc(mov_to_ar_reg); return priv_mov_to_ar_reg(vcpu,inst); } if (inst.M29.x3 != 0) break; @@ -593,31 +594,33 @@ priv_handle_op(VCPU *vcpu, REGS *regs, i } } if (privify_en && x6 == 52 && inst.M28.r3 > 63) - privcnt.fc++; + perfc_incrc(fc); else if (privify_en && x6 == 16 && inst.M43.r3 > 63) - privcnt.cpuid++; - else privcnt.Mpriv_cnt[x6]++; + perfc_incrc(cpuid); + else + perfc_incra(misc_privop, x6); return (*pfunc)(vcpu,inst); break; case B: if (inst.generic.major != 0) break; if (inst.B8.x6 == 0x08) { IA64FAULT fault; - privcnt.rfi++; + perfc_incrc(rfi); fault = priv_rfi(vcpu,inst); if (fault == IA64_NO_FAULT) fault = IA64_RFI_IN_PROGRESS; return fault; } if (inst.B8.x6 == 0x0c) { - privcnt.bsw0++; + perfc_incrc(bsw0); return priv_bsw0(vcpu,inst); } if (inst.B8.x6 == 0x0d) { - privcnt.bsw1++; + perfc_incrc(bsw1); return priv_bsw1(vcpu,inst); } - if (inst.B8.x6 == 0x0) { // break instr for privified cover - privcnt.cover++; + if (inst.B8.x6 == 0x0) { + // break instr for privified cover + perfc_incrc(cover); return priv_cover(vcpu,inst); } break; @@ -625,19 +628,20 @@ priv_handle_op(VCPU *vcpu, REGS *regs, i if (inst.generic.major != 0) break; #if 0 if (inst.I26.x6 == 0 && inst.I26.x3 == 0) { - privcnt.cover++; + perfc_incrc(cover); return priv_cover(vcpu,inst); } #endif if (inst.I26.x3 != 0) break; // I26.x3 == I27.x3 if (inst.I26.x6 == 0x2a) { if (privify_en && inst.I26.r2 > 63 && inst.I26.ar3 < 8) - privcnt.mov_from_ar++; // privified mov from kr - else privcnt.mov_to_ar_reg++; + perfc_incrc(mov_from_ar); // privified mov from kr + else + perfc_incrc(mov_to_ar_reg); return priv_mov_to_ar_reg(vcpu,inst); } if (inst.I27.x6 == 0x0a) { - privcnt.mov_to_ar_imm++; + perfc_incrc(mov_to_ar_imm); return priv_mov_to_ar_imm(vcpu,inst); } break; @@ -705,7 +709,7 @@ ia64_hyperprivop(unsigned long iim, REGS iim, regs->cr_iip); return 1; } - slow_hyperpriv_cnt[iim]++; + perfc_incra(slow_hyperprivop, iim); switch(iim) { case HYPERPRIVOP_RFI: (void)vcpu_rfi(v); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/privop_stat.c --- a/xen/arch/ia64/xen/privop_stat.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/privop_stat.c Sun Aug 13 12:00:38 2006 -0400 @@ -2,26 +2,6 @@ #include <asm/vhpt.h> #include <xen/lib.h> #include <asm/uaccess.h> - -unsigned long dtlb_translate_count = 0; -unsigned long tr_translate_count = 0; -unsigned long phys_translate_count = 0; -unsigned long vhpt_translate_count = 0; -unsigned long fast_vhpt_translate_count = 0; -unsigned long recover_to_page_fault_count = 0; -unsigned long recover_to_break_fault_count = 0; -unsigned long idle_when_pending = 0; -unsigned long pal_halt_light_count = 0; -unsigned long context_switch_count = 0; -unsigned long lazy_cover_count = 0; - -unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; -unsigned long fast_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; - -unsigned long slow_reflect_count[0x80] = { 0 }; -unsigned long fast_reflect_count[0x80] = { 0 }; - -struct privop_counters privcnt; #ifdef PRIVOP_ADDR_COUNT #define PRIVOP_COUNT_NINSTS 2 @@ -97,6 +77,7 @@ Privileged operation instrumentation rou Privileged operation instrumentation routines **************************************************************************/ +#if 0 static const char * const Mpriv_str[64] = { "mov_to_rr", "mov_to_dbr", "mov_to_ibr", "mov_to_pkr", "mov_to_pmc", "mov_to_pmd", "<0x06>", "<0x07>", @@ -132,217 +113,12 @@ static const char * const cr_str[128] = RS,RS,RS,RS,RS,RS,RS,RS }; -// FIXME: should use snprintf to ensure no buffer overflow -static int dump_privop_counts(char *buf) -{ - int i, j; - unsigned long sum = 0; - char *s = buf; - - // this is ugly and should probably produce sorted output - // but it will have to do for now - sum += privcnt.mov_to_ar_imm; sum += privcnt.mov_to_ar_reg; - sum += privcnt.ssm; sum += privcnt.rsm; - sum += privcnt.rfi; sum += privcnt.bsw0; - sum += privcnt.bsw1; sum += privcnt.cover; - for (i=0; i < 64; i++) - sum += privcnt.Mpriv_cnt[i]; - s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum); - if (privcnt.mov_to_ar_imm) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.mov_to_ar_imm, - "mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum); - if (privcnt.mov_to_ar_reg) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.mov_to_ar_reg, - "mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum); - if (privcnt.mov_from_ar) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.mov_from_ar, - "privified-mov_from_ar", (privcnt.mov_from_ar*100L)/sum); - if (privcnt.ssm) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.ssm, - "ssm", (privcnt.ssm*100L)/sum); - if (privcnt.rsm) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.rsm, - "rsm", (privcnt.rsm*100L)/sum); - if (privcnt.rfi) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.rfi, - "rfi", (privcnt.rfi*100L)/sum); - if (privcnt.bsw0) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.bsw0, - "bsw0", (privcnt.bsw0*100L)/sum); - if (privcnt.bsw1) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.bsw1, - "bsw1", (privcnt.bsw1*100L)/sum); - if (privcnt.cover) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.cover, - "cover", (privcnt.cover*100L)/sum); - if (privcnt.fc) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.fc, - "privified-fc", (privcnt.fc*100L)/sum); - if (privcnt.cpuid) - s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.cpuid, - "privified-getcpuid", (privcnt.cpuid*100L)/sum); - for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) { - if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n"); - else s += sprintf(s,"%10ld %s [%ld%%]\n", privcnt.Mpriv_cnt[i], - Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum); - if (i == 0x24) { // mov from CR - s += sprintf(s," ["); - for (j=0; j < 128; j++) if (privcnt.from_cr_cnt[j]) { - if (!cr_str[j]) - s += sprintf(s,"PRIVSTRING NULL!!\n"); - else - s += sprintf(s,"%s(%ld),",cr_str[j], - privcnt.from_cr_cnt[j]); - } - s += sprintf(s,"]\n"); - } - else if (i == 0x2c) { // mov to CR - s += sprintf(s," ["); - for (j=0; j < 128; j++) if (privcnt.to_cr_cnt[j]) { - if (!cr_str[j]) - s += sprintf(s,"PRIVSTRING NULL!!\n"); - else - s += sprintf(s,"%s(%ld),",cr_str[j], - privcnt.to_cr_cnt[j]); - } - s += sprintf(s,"]\n"); - } - } - return s - buf; -} - -static int zero_privop_counts(char *buf) -{ - int i, j; - char *s = buf; - - // this is ugly and should probably produce sorted output - // but it will have to do for now - privcnt.mov_to_ar_imm = 0; - privcnt.mov_to_ar_reg = 0; - privcnt.mov_from_ar = 0; - privcnt.ssm = 0; privcnt.rsm = 0; - privcnt.rfi = 0; privcnt.bsw0 = 0; - privcnt.bsw1 = 0; privcnt.cover = 0; - privcnt.fc = 0; privcnt.cpuid = 0; - for (i=0; i < 64; i++) - privcnt.Mpriv_cnt[i] = 0; - for (j=0; j < 128; j++) - privcnt.from_cr_cnt[j] = 0; - for (j=0; j < 128; j++) - privcnt.to_cr_cnt[j] = 0; - s += sprintf(s,"All privop statistics zeroed\n"); - return s - buf; -} - -static int dump_misc_stats(char *buf) -{ - char *s = buf; - s += sprintf(s,"Virtual TR translations: %ld\n",tr_translate_count); - s += sprintf(s,"Virtual VHPT slow translations: %ld\n",vhpt_translate_count); - s += sprintf(s,"Virtual VHPT fast translations: %ld\n",fast_vhpt_translate_count); - s += sprintf(s,"Virtual DTLB translations: %ld\n",dtlb_translate_count); - s += sprintf(s,"Physical translations: %ld\n",phys_translate_count); - s += sprintf(s,"Recoveries to page fault: %ld\n",recover_to_page_fault_count); - s += sprintf(s,"Recoveries to break fault: %ld\n",recover_to_break_fault_count); - s += sprintf(s,"Idle when pending: %ld\n",idle_when_pending); - s += sprintf(s,"PAL_HALT_LIGHT (no pending): %ld\n",pal_halt_light_count); - s += sprintf(s,"context switches: %ld\n",context_switch_count); - s += sprintf(s,"Lazy covers: %ld\n",lazy_cover_count); - return s - buf; -} - -static void zero_misc_stats(void) -{ - dtlb_translate_count = 0; - tr_translate_count = 0; - phys_translate_count = 0; - vhpt_translate_count = 0; - fast_vhpt_translate_count = 0; - recover_to_page_fault_count = 0; - recover_to_break_fault_count = 0; - lazy_cover_count = 0; - pal_halt_light_count = 0; - idle_when_pending = 0; - context_switch_count = 0; -} - static const char * const hyperpriv_str[HYPERPRIVOP_MAX+1] = { 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i", "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d", "=rr", "rr=", "kr=", "fc", "=cpuid", "=pmd", "=ar.eflg", "ar.eflg=" }; - - -static int dump_hyperprivop_counts(char *buf) -{ - int i; - char *s = buf; - unsigned long total = 0; - for (i = 1; i <= HYPERPRIVOP_MAX; i++) - total += slow_hyperpriv_cnt[i]; - s += sprintf(s,"Slow hyperprivops (total %ld):\n",total); - for (i = 1; i <= HYPERPRIVOP_MAX; i++) - if (slow_hyperpriv_cnt[i]) - s += sprintf(s,"%10ld %s\n", - slow_hyperpriv_cnt[i], hyperpriv_str[i]); - total = 0; - for (i = 1; i <= HYPERPRIVOP_MAX; i++) - total += fast_hyperpriv_cnt[i]; - s += sprintf(s,"Fast hyperprivops (total %ld):\n",total); - for (i = 1; i <= HYPERPRIVOP_MAX; i++) - if (fast_hyperpriv_cnt[i]) - s += sprintf(s,"%10ld %s\n", - fast_hyperpriv_cnt[i], hyperpriv_str[i]); - return s - buf; -} - -static void zero_hyperprivop_counts(void) -{ - int i; - for (i = 0; i <= HYPERPRIVOP_MAX; i++) - slow_hyperpriv_cnt[i] = 0; - for (i = 0; i <= HYPERPRIVOP_MAX; i++) - fast_hyperpriv_cnt[i] = 0; -} - -static void zero_reflect_counts(void) -{ - int i; - for (i=0; i < 0x80; i++) - slow_reflect_count[i] = 0; - for (i=0; i < 0x80; i++) - fast_reflect_count[i] = 0; -} - -static int dump_reflect_counts(char *buf) -{ - int i,j,cnt; - char *s = buf; - - s += sprintf(s,"Slow reflections by vector:\n"); - for (i = 0, j = 0; i < 0x80; i++) { - if ( (cnt = slow_reflect_count[i]) != 0 ) { - s += sprintf(s,"0x%02x00:%10d, ",i,cnt); - if ((j++ & 3) == 3) - s += sprintf(s,"\n"); - } - } - if (j & 3) - s += sprintf(s,"\n"); - s += sprintf(s,"Fast reflections by vector:\n"); - for (i = 0, j = 0; i < 0x80; i++) { - if ( (cnt = fast_reflect_count[i]) != 0 ) { - s += sprintf(s,"0x%02x00:%10d, ",i,cnt); - if ((j++ & 3) == 3) - s += sprintf(s,"\n"); - } - } - if (j & 3) - s += sprintf(s,"\n"); - return s - buf; -} - +#endif #define TMPBUFLEN 8*1024 int dump_privop_counts_to_user(char __user *ubuf, int len) @@ -353,14 +129,11 @@ int dump_privop_counts_to_user(char __us if (len < TMPBUFLEN) return -1; - n = dump_privop_counts(buf); - n += dump_hyperprivop_counts(buf + n); - n += dump_reflect_counts(buf + n); + n = 0; #ifdef PRIVOP_ADDR_COUNT n += dump_privop_addrs(buf + n); #endif n += dump_vhpt_stats(buf + n); - n += dump_misc_stats(buf + n); if (__copy_to_user(ubuf,buf,n)) return -1; return n; @@ -368,22 +141,8 @@ int dump_privop_counts_to_user(char __us int zero_privop_counts_to_user(char __user *ubuf, int len) { - char buf[TMPBUFLEN]; - int n; - - if (len < TMPBUFLEN) - return -1; - - n = zero_privop_counts(buf); - - zero_hyperprivop_counts(); #ifdef PRIVOP_ADDR_COUNT zero_privop_addrs(); #endif - zero_vhpt_stats(); - zero_misc_stats(); - zero_reflect_counts(); - if (__copy_to_user(ubuf,buf,n)) - return -1; - return n; + return 0; } diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/vcpu.c Sun Aug 13 12:00:38 2006 -0400 @@ -763,12 +763,8 @@ UINT64 vcpu_deliverable_timer(VCPU *vcpu IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval) { - /* Use real LID for domain0 until vIOSAPIC is present. - Use EID=0, ID=vcpu_id for domU. */ - if (vcpu->domain == dom0) - *pval = ia64_getreg(_IA64_REG_CR_LID); - else - *pval = vcpu->vcpu_id << 24; + /* Use EID=0, ID=vcpu_id. */ + *pval = vcpu->vcpu_id << 24; return IA64_NO_FAULT; } @@ -1500,7 +1496,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX; *itir = PAGE_SHIFT << 2; - phys_translate_count++; + perfc_incrc(phys_translate); return IA64_NO_FAULT; } } @@ -1521,7 +1517,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN if (trp != NULL) { *pteval = trp->pte.val; *itir = trp->itir; - tr_translate_count++; + perfc_incrc(tr_translate); return IA64_NO_FAULT; } } @@ -1531,7 +1527,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN if (trp != NULL) { *pteval = trp->pte.val; *itir = trp->itir; - tr_translate_count++; + perfc_incrc(tr_translate); return IA64_NO_FAULT; } } @@ -1544,7 +1540,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN && vcpu_match_tr_entry_no_p(trp,address,rid)) { *pteval = pte.val; *itir = trp->itir; - dtlb_translate_count++; + perfc_incrc(dtlb_translate); return IA64_USE_TLB; } @@ -1582,7 +1578,7 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UIN /* found mapping in guest VHPT! */ *itir = rr & RR_PS_MASK; *pteval = pte.val; - vhpt_translate_count++; + perfc_incrc(vhpt_translate); return IA64_NO_FAULT; } @@ -2012,9 +2008,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 panic_domain (NULL, "vcpu_itc_no_srlz: domain trying to use " "smaller page size!\n"); -#ifdef CONFIG_XEN_IA64_DOM0_VP BUG_ON(logps > PAGE_SHIFT); -#endif psr = ia64_clear_ic(); ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings ia64_set_psr(psr); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/vhpt.c Sun Aug 13 12:00:38 2006 -0400 @@ -261,11 +261,6 @@ void flush_tlb_mask(cpumask_t mask) (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1); } -void zero_vhpt_stats(void) -{ - return; -} - int dump_vhpt_stats(char *buf) { int i, cpu; diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/ia64/xen/xensetup.c Sun Aug 13 12:00:38 2006 -0400 @@ -388,13 +388,6 @@ void start_kernel(void) max_page = 0; efi_memmap_walk(find_max_pfn, &max_page); printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page); -#ifndef CONFIG_XEN_IA64_DOM0_VP - /* this is a bad hack. see dom_fw.c creation of EFI map for dom0 */ - max_page = (GRANULEROUNDDOWN(max_page << PAGE_SHIFT) - - IA64_GRANULE_SIZE) >> PAGE_SHIFT; - printf("find_memory: last granule reserved for dom0; xen max_page=%lx\n", - max_page); -#endif efi_print(); heap_start = memguard_init(ia64_imva(&_end)); @@ -422,6 +415,8 @@ void start_kernel(void) printk("Xen heap: %luMB (%lukB)\n", (xenheap_phys_end-__pa(heap_start)) >> 20, (xenheap_phys_end-__pa(heap_start)) >> 10); + + late_setup_arch(&cmdline); scheduler_init(); idle_vcpu[0] = (struct vcpu*) ia64_r13; @@ -429,7 +424,6 @@ void start_kernel(void) if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) BUG(); - late_setup_arch(&cmdline); alloc_dom_xen_and_dom_io(); setup_per_cpu_areas(); mem_init(); @@ -532,8 +526,10 @@ printk("num_online_cpus=%d, max_cpus=%d\ init_trace_bufs(); - if (opt_xencons) + if (opt_xencons) { console_endboot(); + serial_endboot(); + } domain0_ready = 1; diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/powerpc/setup.c --- a/xen/arch/powerpc/setup.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/powerpc/setup.c Sun Aug 13 12:00:38 2006 -0400 @@ -55,7 +55,7 @@ boolean_param("earlygdb", opt_earlygdb); boolean_param("earlygdb", opt_earlygdb); u32 tlbflush_clock = 1U; -u32 tlbflush_time[NR_CPUS]; +DEFINE_PER_CPU(u32, tlbflush_time); unsigned int watchdog_on; unsigned long wait_init_idle; diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/domain.c Sun Aug 13 12:00:38 2006 -0400 @@ -154,7 +154,7 @@ int arch_domain_create(struct domain *d) int arch_domain_create(struct domain *d) { l1_pgentry_t gdt_l1e; - int vcpuid, pdpt_order, rc; + int vcpuid, pdpt_order; #ifdef __x86_64__ int i; #endif @@ -213,9 +213,6 @@ int arch_domain_create(struct domain *d) goto fail_nomem; if ( (d->shared_info = alloc_xenheap_page()) == NULL ) - goto fail_nomem; - - if ( (rc = ptwr_init(d)) != 0 ) goto fail_nomem; memset(d->shared_info, 0, PAGE_SIZE); @@ -797,7 +794,7 @@ unsigned long hypercall_create_continuat unsigned long hypercall_create_continuation( unsigned int op, const char *format, ...) { - struct mc_state *mcs = &mc_state[smp_processor_id()]; + struct mc_state *mcs = &this_cpu(mc_state); struct cpu_user_regs *regs; const char *p = format; unsigned long arg; @@ -926,8 +923,6 @@ void domain_relinquish_resources(struct unsigned long pfn; BUG_ON(!cpus_empty(d->domain_dirty_cpumask)); - - ptwr_destroy(d); /* Drop the in-use references to page-table bases. */ for_each_vcpu ( d, v ) diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/flushtlb.c --- a/xen/arch/x86/flushtlb.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/flushtlb.c Sun Aug 13 12:00:38 2006 -0400 @@ -20,7 +20,7 @@ #endif u32 tlbflush_clock = 1U; -u32 tlbflush_time[NR_CPUS]; +DEFINE_PER_CPU(u32, tlbflush_time); void write_cr3(unsigned long cr3) { @@ -71,7 +71,7 @@ void write_cr3(unsigned long cr3) * case, so really we are being ultra paranoid. */ - tlbflush_time[smp_processor_id()] = t2; + this_cpu(tlbflush_time) = t2; local_irq_restore(flags); } diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/hvm.c Sun Aug 13 12:00:38 2006 -0400 @@ -29,6 +29,7 @@ #include <xen/domain_page.h> #include <xen/hypercall.h> #include <xen/guest_access.h> +#include <xen/event.h> #include <asm/current.h> #include <asm/io.h> #include <asm/shadow.h> @@ -46,7 +47,6 @@ #endif #include <public/sched.h> #include <public/hvm/ioreq.h> -#include <public/hvm/hvm_info_table.h> #include <public/version.h> #include <public/memory.h> @@ -133,15 +133,28 @@ static void e820_map_io_shared_callback( } } -void hvm_map_io_shared_page(struct vcpu *v) -{ - unsigned long mfn = INVALID_MFN; +static void e820_map_buffered_io_callback(struct domain *d, + struct e820entry *e, + void *data) +{ + unsigned long *mfn = data; + if ( e->type == E820_BUFFERED_IO ) { + ASSERT(*mfn == INVALID_MFN); + *mfn = gmfn_to_mfn(d, e->addr >> PAGE_SHIFT); + } +} + +void hvm_map_io_shared_pages(struct vcpu *v) +{ + unsigned long mfn; void *p; struct domain *d = v->domain; - if ( d->arch.hvm_domain.shared_page_va ) + if ( d->arch.hvm_domain.shared_page_va || + d->arch.hvm_domain.buffered_io_va ) return; + mfn = INVALID_MFN; e820_foreach(d, e820_map_io_shared_callback, &mfn); if ( mfn == INVALID_MFN ) @@ -158,7 +171,38 @@ void hvm_map_io_shared_page(struct vcpu } d->arch.hvm_domain.shared_page_va = (unsigned long)p; -} + + mfn = INVALID_MFN; + e820_foreach(d, e820_map_buffered_io_callback, &mfn); + if ( mfn != INVALID_MFN ) { + p = map_domain_page_global(mfn); + if ( p ) + d->arch.hvm_domain.buffered_io_va = (unsigned long)p; + } +} + +void hvm_create_event_channels(struct vcpu *v) +{ + vcpu_iodata_t *p; + struct vcpu *o; + + if ( v->vcpu_id == 0 ) { + /* Ugly: create event channels for every vcpu when vcpu 0 + starts, so that they're available for ioemu to bind to. */ + for_each_vcpu(v->domain, o) { + p = get_vio(v->domain, o->vcpu_id); + o->arch.hvm_vcpu.xen_port = p->vp_eport = + alloc_unbound_xen_event_channel(o, 0); + DPRINTK("Allocated port %d for hvm.\n", o->arch.hvm_vcpu.xen_port); + } + } +} + +void hvm_release_assist_channel(struct vcpu *v) +{ + free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port); +} + void hvm_setup_platform(struct domain* d) { @@ -175,7 +219,6 @@ void hvm_setup_platform(struct domain* d } hvm_zap_iommu_pages(d); - hvm_map_io_shared_page(v); platform = &d->arch.hvm_domain; pic_init(&platform->vpic, pic_irq_request, &platform->interrupt_request); @@ -186,6 +229,8 @@ void hvm_setup_platform(struct domain* d spin_lock_init(&d->arch.hvm_domain.round_robin_lock); hvm_vioapic_init(d); } + + spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, v->processor); @@ -523,7 +568,7 @@ long do_hvm_op(unsigned long op, XEN_GUE else if ( IS_PRIV(current->domain) ) { d = find_domain_by_id(a.domid); - if ( !d ) + if ( d == NULL ) return -ESRCH; } else @@ -533,22 +578,24 @@ long do_hvm_op(unsigned long op, XEN_GUE if ( op == HVMOP_set_param ) { + d->arch.hvm_domain.params[a.index] = a.value; rc = 0; - d->arch.hvm_domain.params[a.index] = a.value; } else { - rc = d->arch.hvm_domain.params[a.index]; + a.value = d->arch.hvm_domain.params[a.index]; + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; } put_domain(d); - return rc; + break; } default: { DPRINTK("Bad HVM op %ld.\n", op); rc = -ENOSYS; + break; } } diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/intercept.c Sun Aug 13 12:00:38 2006 -0400 @@ -36,10 +36,24 @@ extern struct hvm_mmio_handler vioapic_m #define HVM_MMIO_HANDLER_NR 2 -struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = +static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = { &vlapic_mmio_handler, &vioapic_mmio_handler +}; + +struct hvm_buffered_io_range { + unsigned long start_addr; + unsigned long length; +}; + +#define HVM_BUFFERED_IO_RANGE_NR 1 + +static struct hvm_buffered_io_range buffered_stdvga_range = {0xA0000, 0x20000}; +static struct hvm_buffered_io_range +*hvm_buffered_io_ranges[HVM_BUFFERED_IO_RANGE_NR] = +{ + &buffered_stdvga_range }; static inline void hvm_mmio_access(struct vcpu *v, @@ -140,6 +154,56 @@ static inline void hvm_mmio_access(struc } } +int hvm_buffered_io_intercept(ioreq_t *p) +{ + struct vcpu *v = current; + spinlock_t *buffered_io_lock; + buffered_iopage_t *buffered_iopage = + (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va); + unsigned long tmp_write_pointer = 0; + int i; + + /* ignore READ ioreq_t! */ + if ( p->dir == IOREQ_READ ) + return 0; + + for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) { + if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr && + p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr + + hvm_buffered_io_ranges[i]->length ) + break; + } + + if ( i == HVM_BUFFERED_IO_RANGE_NR ) + return 0; + + buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock; + spin_lock(buffered_io_lock); + + if ( buffered_iopage->write_pointer - buffered_iopage->read_pointer == + (unsigned long)IOREQ_BUFFER_SLOT_NUM ) { + /* the queue is full. + * send the iopacket through the normal path. + * NOTE: The arithimetic operation could handle the situation for + * write_pointer overflow. + */ + spin_unlock(buffered_io_lock); + return 0; + } + + tmp_write_pointer = buffered_iopage->write_pointer % IOREQ_BUFFER_SLOT_NUM; + + memcpy(&buffered_iopage->ioreq[tmp_write_pointer], p, sizeof(ioreq_t)); + + /*make the ioreq_t visible before write_pointer*/ + wmb(); + buffered_iopage->write_pointer++; + + spin_unlock(buffered_io_lock); + + return 1; +} + int hvm_mmio_intercept(ioreq_t *p) { struct vcpu *v = current; @@ -211,7 +275,7 @@ void hlt_timer_fn(void *data) { struct vcpu *v = data; - evtchn_set_pending(v, iopacket_port(v)); + hvm_prod_vcpu(v); } static __inline__ void missed_ticks(struct periodic_time *pt) diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/io.c Sun Aug 13 12:00:38 2006 -0400 @@ -687,84 +687,17 @@ void hvm_io_assist(struct vcpu *v) p = &vio->vp_ioreq; - /* clear IO wait HVM flag */ - if ( test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) { - if ( p->state == STATE_IORESP_READY ) { - p->state = STATE_INVALID; - clear_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); - - if ( p->type == IOREQ_TYPE_PIO ) - hvm_pio_assist(regs, p, io_opp); - else - hvm_mmio_assist(regs, p, io_opp); - - /* Copy register changes back into current guest state. */ - hvm_load_cpu_guest_regs(v, regs); - memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES); - } - /* else an interrupt send event raced us */ - } -} - -/* - * On exit from hvm_wait_io, we're guaranteed not to be waiting on - * I/O response from the device model. - */ -void hvm_wait_io(void) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(v); - - for ( ; ; ) - { - /* Clear master flag, selector flag, event flag each in turn. */ - v->vcpu_info->evtchn_upcall_pending = 0; - clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - smp_mb__after_clear_bit(); - if ( test_and_clear_bit(port, &d->shared_info->evtchn_pending[0]) ) - hvm_io_assist(v); - - /* Need to wait for I/O responses? */ - if ( !test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - break; - - do_sched_op_compat(SCHEDOP_block, 0); - } - - /* - * Re-set the selector and master flags in case any other notifications - * are pending. - */ - if ( d->shared_info->evtchn_pending[port/BITS_PER_LONG] ) - set_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - if ( v->vcpu_info->evtchn_pending_sel ) - v->vcpu_info->evtchn_upcall_pending = 1; -} - -void hvm_safe_block(void) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - int port = iopacket_port(v); - - for ( ; ; ) - { - /* Clear master flag & selector flag so we will wake from block. */ - v->vcpu_info->evtchn_upcall_pending = 0; - clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - smp_mb__after_clear_bit(); - - /* Event pending already? */ - if ( test_bit(port, &d->shared_info->evtchn_pending[0]) ) - break; - - do_sched_op_compat(SCHEDOP_block, 0); - } - - /* Reflect pending event in selector and master flags. */ - set_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel); - v->vcpu_info->evtchn_upcall_pending = 1; + if ( p->state == STATE_IORESP_READY ) { + p->state = STATE_INVALID; + if ( p->type == IOREQ_TYPE_PIO ) + hvm_pio_assist(regs, p, io_opp); + else + hvm_mmio_assist(regs, p, io_opp); + + /* Copy register changes back into current guest state. */ + hvm_load_cpu_guest_regs(v, regs); + memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES); + } } /* diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/platform.c Sun Aug 13 12:00:38 2006 -0400 @@ -669,6 +669,30 @@ int inst_copy_from_guest(unsigned char * return inst_len; } +static void hvm_send_assist_req(struct vcpu *v) +{ + ioreq_t *p; + + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + if ( unlikely(p->state != STATE_INVALID) ) { + /* This indicates a bug in the device model. Crash the + domain. */ + printf("Device model set bad IO state %d.\n", p->state); + domain_crash(v->domain); + return; + } + wmb(); + p->state = STATE_IOREQ_READY; + notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port); +} + + +/* Wake up a vcpu whihc is waiting for interrupts to come in */ +void hvm_prod_vcpu(struct vcpu *v) +{ + vcpu_unblock(v); +} + void send_pio_req(struct cpu_user_regs *regs, unsigned long port, unsigned long count, int size, long value, int dir, int pvalid) { @@ -682,13 +706,10 @@ void send_pio_req(struct cpu_user_regs * domain_crash_synchronous(); } - if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { - printf("HVM I/O has not yet completed\n"); - domain_crash_synchronous(); - } - set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); - p = &vio->vp_ioreq; + if ( p->state != STATE_INVALID ) + printf("WARNING: send pio with something already pending (%d)?\n", + p->state); p->dir = dir; p->pdata_valid = pvalid; @@ -714,10 +735,7 @@ void send_pio_req(struct cpu_user_regs * return; } - p->state = STATE_IOREQ_READY; - - evtchn_send(iopacket_port(v)); - hvm_wait_io(); + hvm_send_assist_req(v); } void send_mmio_req( @@ -739,12 +757,9 @@ void send_mmio_req( p = &vio->vp_ioreq; - if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) { - printf("HVM I/O has not yet completed\n"); - domain_crash_synchronous(); - } - - set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags); + if ( p->state != STATE_INVALID ) + printf("WARNING: send mmio with something already pending (%d)?\n", + p->state); p->dir = dir; p->pdata_valid = pvalid; @@ -764,16 +779,13 @@ void send_mmio_req( } else p->u.data = value; - if (hvm_mmio_intercept(p)){ + if ( hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p) ) { p->state = STATE_IORESP_READY; hvm_io_assist(v); return; } - p->state = STATE_IOREQ_READY; - - evtchn_send(iopacket_port(v)); - hvm_wait_io(); + hvm_send_assist_req(v); } static void mmio_operands(int type, unsigned long gpa, struct instruction *inst, diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/svm/intr.c Sun Aug 13 12:00:38 2006 -0400 @@ -112,6 +112,7 @@ asmlinkage void svm_intr_assist(void) struct hvm_domain *plat=&v->domain->arch.hvm_domain; struct periodic_time *pt = &plat->pl_time.periodic_tm; struct hvm_virpic *pic= &plat->vpic; + int callback_irq; int intr_type = APIC_DM_EXTINT; int intr_vector = -1; int re_injecting = 0; @@ -156,11 +157,21 @@ asmlinkage void svm_intr_assist(void) if ( v->vcpu_id == 0 ) hvm_pic_assist(v); - /* Before we deal with PIT interrupts, let's check - for interrupts set by the device model. + callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ]; + + /* Before we deal with PIT interrupts, let's check for + interrupts set by the device model or paravirtualised event + channel interrupts. */ if ( cpu_has_pending_irq(v) ) { intr_vector = cpu_get_interrupt(v, &intr_type); + } + else if ( callback_irq != 0 && local_events_need_delivery() ) { + /*inject para-device call back irq*/ + v->vcpu_info->evtchn_upcall_mask = 1; + pic_set_irq(pic, callback_irq, 0); + pic_set_irq(pic, callback_irq, 1); + intr_vector = callback_irq; } else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { pic_set_irq(pic, pt->irq, 0); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/svm/svm.c Sun Aug 13 12:00:38 2006 -0400 @@ -25,6 +25,7 @@ #include <xen/sched.h> #include <xen/irq.h> #include <xen/softirq.h> +#include <xen/hypercall.h> #include <asm/current.h> #include <asm/io.h> #include <asm/shadow.h> @@ -808,6 +809,9 @@ static void svm_relinquish_guest_resourc if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( (void *)d->arch.hvm_domain.shared_page_va); + + if ( d->arch.hvm_domain.buffered_io_va ) + unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va); shadow_direct_map_clean(d); } @@ -2121,7 +2125,7 @@ static inline void svm_vmexit_do_hlt(str next_wakeup = next_pit; if ( next_wakeup != - 1 ) set_timer(¤t->arch.hvm_svm.hlt_timer, next_wakeup); - hvm_safe_block(); + do_sched_op_compat(SCHEDOP_block, 0); } diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/svm/vmcb.c Sun Aug 13 12:00:38 2006 -0400 @@ -370,18 +370,6 @@ void svm_do_launch(struct vcpu *v) if (v->vcpu_id == 0) hvm_setup_platform(v->domain); - if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) - { - printk("HVM domain bind port %d to vcpu %d failed!\n", - iopacket_port(v), v->vcpu_id); - domain_crash_synchronous(); - } - - HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); - - clear_bit(iopacket_port(v), - &v->domain->shared_info->evtchn_mask[0]); - if (hvm_apic_support(v->domain)) vlapic_init(v); init_timer(&v->arch.hvm_svm.hlt_timer, @@ -439,10 +427,12 @@ void set_hsa_to_guest( struct arch_svm_s /* * Resume the guest. */ +/* XXX svm_do_resume and vmx_do_resume are remarkably similar; could + they be unified? */ void svm_do_resume(struct vcpu *v) { - struct domain *d = v->domain; - struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm; + struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; + ioreq_t *p; svm_stts(v); @@ -455,12 +445,16 @@ void svm_do_resume(struct vcpu *v) pickup_deactive_ticks(pt); } - if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || - test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - hvm_wait_io(); - - /* We can't resume the guest if we're waiting on I/O */ - ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + wait_on_xen_event_channel(v->arch.hvm.xen_port, + p->state != STATE_IOREQ_READY && + p->state != STATE_IOREQ_INPROCESS); + if ( p->state == STATE_IORESP_READY ) + hvm_io_assist(v); + if ( p->state != STATE_INVALID ) { + printf("Weird HVM iorequest state %d.\n", p->state); + domain_crash(v->domain); + } } void svm_launch_fail(unsigned long eflags) diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/svm/x86_32/exits.S --- a/xen/arch/x86/hvm/svm/x86_32/exits.S Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/svm/x86_32/exits.S Sun Aug 13 12:00:38 2006 -0400 @@ -132,6 +132,9 @@ ENTRY(svm_asm_do_resume) ENTRY(svm_asm_do_resume) svm_test_all_events: GET_CURRENT(%ebx) + pushl %ebx + call svm_do_resume + addl $4, %esp /*test_all_events:*/ xorl %ecx,%ecx notl %ecx diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/svm/x86_64/exits.S --- a/xen/arch/x86/hvm/svm/x86_64/exits.S Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/svm/x86_64/exits.S Sun Aug 13 12:00:38 2006 -0400 @@ -147,6 +147,8 @@ ENTRY(svm_asm_do_resume) ENTRY(svm_asm_do_resume) svm_test_all_events: GET_CURRENT(%rbx) + movq %rbx, %rdi + call svm_do_resume /*test_all_events:*/ cli # tests must not race interrupts /*test_softirqs:*/ diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/vioapic.c Sun Aug 13 12:00:38 2006 -0400 @@ -79,7 +79,7 @@ static unsigned long hvm_vioapic_read_in switch (s->ioregsel) { case IOAPIC_REG_VERSION: result = ((((IOAPIC_NUM_PINS-1) & 0xff) << 16) - | (IOAPIC_VERSION_ID & 0x0f)); + | (IOAPIC_VERSION_ID & 0xff)); break; #ifndef __ia64__ @@ -89,7 +89,7 @@ static unsigned long hvm_vioapic_read_in case IOAPIC_REG_ARB_ID: /* XXX how arb_id used on p4? */ - result = ((s->id & 0xf) << 24); + result = ((s->arb_id & 0xf) << 24); break; #endif @@ -107,7 +107,7 @@ static unsigned long hvm_vioapic_read_in (redir_content >> 32) & 0xffffffff : redir_content & 0xffffffff; } else { - printk("upic_mem_readl:undefined ioregsel %x\n", + printk("apic_mem_readl:undefined ioregsel %x\n", s->ioregsel); domain_crash_synchronous(); } @@ -244,7 +244,7 @@ static int hvm_vioapic_range(struct vcpu if ((s->flags & IOAPIC_ENABLE_FLAG) && (addr >= s->base_address && - (addr <= s->base_address + IOAPIC_MEM_LENGTH))) + (addr < s->base_address + IOAPIC_MEM_LENGTH))) return 1; else return 0; @@ -427,7 +427,7 @@ static void ioapic_deliver(hvm_vioapic_t else HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "null round robin mask %x vector %x delivery_mode %x\n", - deliver_bitmask, vector, deliver_bitmask); + deliver_bitmask, vector, dest_LowestPrio); break; } @@ -568,7 +568,7 @@ static int get_redir_num(hvm_vioapic_t * ASSERT(s); - for(i = 0; i < IOAPIC_NUM_PINS - 1; i++) { + for(i = 0; i < IOAPIC_NUM_PINS; i++) { if (s->redirtbl[i].RedirForm.vector == vector) return i; } diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/vlapic.c Sun Aug 13 12:00:38 2006 -0400 @@ -68,7 +68,7 @@ int vlapic_find_highest_irr(struct vlapi result = find_highest_bit((unsigned long *)(vlapic->regs + APIC_IRR), MAX_VECTOR); - ASSERT( result == -1 || result > 16); + ASSERT( result == -1 || result >= 16); return result; } @@ -91,7 +91,7 @@ int vlapic_find_highest_isr(struct vlapi result = find_highest_bit((unsigned long *)(vlapic->regs + APIC_ISR), MAX_VECTOR); - ASSERT( result == -1 || result > 16); + ASSERT( result == -1 || result >= 16); return result; } @@ -156,10 +156,11 @@ static int vlapic_match_dest(struct vcpu } else /* Logical */ { - uint32_t ldr = vlapic_get_reg(target, APIC_LDR); - + uint32_t ldr; if ( target == NULL ) break; + ldr = vlapic_get_reg(target, APIC_LDR); + /* Flat mode */ if ( vlapic_get_reg(target, APIC_DFR) == APIC_DFR_FLAT) { @@ -219,20 +220,20 @@ static int vlapic_accept_irq(struct vcpu if ( unlikely(vlapic == NULL || !vlapic_enabled(vlapic)) ) break; - if ( test_and_set_bit(vector, vlapic->regs + APIC_IRR) ) + if ( test_and_set_bit(vector, vlapic->regs + APIC_IRR) && trig_mode) { HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "level trig mode repeatedly for vector %d\n", vector); + "level trig mode repeatedly for vector %d\n", vector); break; } - if ( level ) + if ( trig_mode ) { HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "level trig mode for vector %d\n", vector); set_bit(vector, vlapic->regs + APIC_TMR); } - evtchn_set_pending(v, iopacket_port(v)); + hvm_prod_vcpu(v); result = 1; break; @@ -248,7 +249,7 @@ static int vlapic_accept_irq(struct vcpu break; case APIC_DM_INIT: - if ( level && !(trig_mode & APIC_INT_ASSERT) ) //Deassert + if ( trig_mode && !(level & APIC_INT_ASSERT) ) //Deassert printk("This hvm_vlapic is for P4, no work for De-assert init\n"); else { @@ -290,11 +291,12 @@ static int vlapic_accept_irq(struct vcpu return result; } + /* - This function is used by both ioapic and local APIC - The bitmap is for vcpu_id + * This function is used by both ioapic and local APIC + * The bitmap is for vcpu_id */ -struct vlapic* apic_round_robin(struct domain *d, +struct vlapic *apic_round_robin(struct domain *d, uint8_t dest_mode, uint8_t vector, uint32_t bitmap) @@ -321,11 +323,11 @@ struct vlapic* apic_round_robin(struct d /* the vcpu array is arranged according to vcpu_id */ do { - next++; - if ( !d->vcpu[next] || - !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) || - next == MAX_VIRT_CPUS ) + if ( ++next == MAX_VIRT_CPUS ) next = 0; + if ( d->vcpu[next] == NULL || + !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) ) + continue; if ( test_bit(next, &bitmap) ) { @@ -384,15 +386,15 @@ static void vlapic_ipi(struct vlapic *vl unsigned int dest = GET_APIC_DEST_FIELD(icr_high); unsigned int short_hand = icr_low & APIC_SHORT_MASK; - unsigned int trig_mode = icr_low & APIC_INT_ASSERT; - unsigned int level = icr_low & APIC_INT_LEVELTRIG; + unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; + unsigned int level = icr_low & APIC_INT_ASSERT; unsigned int dest_mode = icr_low & APIC_DEST_MASK; unsigned int delivery_mode = icr_low & APIC_MODE_MASK; unsigned int vector = icr_low & APIC_VECTOR_MASK; struct vlapic *target; struct vcpu *v = NULL; - uint32_t lpr_map; + uint32_t lpr_map=0; HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "icr_high 0x%x, icr_low 0x%x, " "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " @@ -456,7 +458,7 @@ static uint32_t vlapic_get_tmcct(struct { do { tmcct += vlapic_get_reg(vlapic, APIC_TMICT); - } while ( tmcct < 0 ); + } while ( tmcct <= 0 ); } } @@ -489,6 +491,11 @@ static void vlapic_read_aligned(struct v *result = vlapic_get_tmcct(vlapic); break; + case APIC_ESR: + vlapic->err_write_count = 0; + *result = vlapic_get_reg(vlapic, offset); + break; + default: *result = vlapic_get_reg(vlapic, offset); break; @@ -522,10 +529,12 @@ static unsigned long vlapic_read(struct break; case 2: + ASSERT( alignment != 3 ); result = *(unsigned short *)((unsigned char *)&tmp + alignment); break; case 4: + ASSERT( alignment == 0 ); result = *(unsigned int *)((unsigned char *)&tmp + alignment); break; @@ -561,7 +570,7 @@ static void vlapic_write(struct vcpu *v, unsigned int tmp; unsigned char alignment; - /* Some kernel do will access with byte/word alignment*/ + /* Some kernels do will access with byte/word alignment */ printk("Notice: Local APIC write with len = %lx\n",len); alignment = offset & 0x3; tmp = vlapic_read(v, offset & ~0x3, 4); @@ -570,8 +579,8 @@ static void vlapic_write(struct vcpu *v, /* XXX the saddr is a tmp variable from caller, so should be ok But we should still change the following ref to val to local variable later */ - val = (tmp & ~(0xff << alignment)) | - ((val & 0xff) << alignment); + val = (tmp & ~(0xff << (8*alignment))) | + ((val & 0xff) << (8*alignment)); break; case 2: @@ -581,8 +590,8 @@ static void vlapic_write(struct vcpu *v, domain_crash_synchronous(); } - val = (tmp & ~(0xffff << alignment)) | - ((val & 0xffff) << alignment); + val = (tmp & ~(0xffff << (8*alignment))) | + ((val & 0xffff) << (8*alignment)); break; case 3: @@ -619,11 +628,11 @@ static void vlapic_write(struct vcpu *v, break; case APIC_DFR: - vlapic_set_reg(vlapic, APIC_DFR, val); + vlapic_set_reg(vlapic, APIC_DFR, val | 0x0FFFFFFF); break; case APIC_SPIV: - vlapic_set_reg(vlapic, APIC_SPIV, val & 0x1ff); + vlapic_set_reg(vlapic, APIC_SPIV, val & 0x3ff); if ( !( val & APIC_SPIV_APIC_ENABLED) ) { @@ -634,7 +643,7 @@ static void vlapic_write(struct vcpu *v, for ( i = 0; i < VLAPIC_LVT_NUM; i++ ) { - lvt_val = vlapic_get_reg(vlapic, APIC_LVT1 + 0x10 * i); + lvt_val = vlapic_get_reg(vlapic, APIC_LVTT + 0x10 * i); vlapic_set_reg(vlapic, APIC_LVTT + 0x10 * i, lvt_val | APIC_LVT_MASKED); } @@ -753,7 +762,7 @@ static int vlapic_range(struct vcpu *v, if ( vlapic_global_enabled(vlapic) && (addr >= vlapic->base_address) && - (addr <= vlapic->base_address + VLOCAL_APIC_MEM_LENGTH) ) + (addr < vlapic->base_address + VLOCAL_APIC_MEM_LENGTH) ) return 1; return 0; @@ -940,7 +949,7 @@ void vlapic_post_injection(struct vcpu * case APIC_DM_NMI: case APIC_DM_INIT: case APIC_DM_STARTUP: - vlapic->direct_intr.deliver_mode &= deliver_mode; + vlapic->direct_intr.deliver_mode &= (1 << (deliver_mode >> 8)); break; default: diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/io.c Sun Aug 13 12:00:38 2006 -0400 @@ -142,6 +142,7 @@ asmlinkage void vmx_intr_assist(void) struct hvm_domain *plat=&v->domain->arch.hvm_domain; struct periodic_time *pt = &plat->pl_time.periodic_tm; struct hvm_virpic *pic= &plat->vpic; + int callback_irq; unsigned int idtv_info_field; unsigned long inst_len; int has_ext_irq; @@ -152,6 +153,15 @@ asmlinkage void vmx_intr_assist(void) if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { pic_set_irq(pic, pt->irq, 0); pic_set_irq(pic, pt->irq, 1); + } + + callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ]; + if ( callback_irq != 0 && + local_events_need_delivery() ) { + /*inject para-device call back irq*/ + v->vcpu_info->evtchn_upcall_mask = 1; + pic_set_irq(pic, callback_irq, 0); + pic_set_irq(pic, callback_irq, 1); } has_ext_irq = cpu_has_pending_irq(v); @@ -221,7 +231,7 @@ asmlinkage void vmx_intr_assist(void) void vmx_do_resume(struct vcpu *v) { - struct domain *d = v->domain; + ioreq_t *p; struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; vmx_stts(); @@ -235,12 +245,16 @@ void vmx_do_resume(struct vcpu *v) pickup_deactive_ticks(pt); } - if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) || - test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) ) - hvm_wait_io(); - - /* We can't resume the guest if we're waiting on I/O */ - ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)); + p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq; + wait_on_xen_event_channel(v->arch.hvm.xen_port, + p->state != STATE_IOREQ_READY && + p->state != STATE_IOREQ_INPROCESS); + if ( p->state == STATE_IORESP_READY ) + hvm_io_assist(v); + if ( p->state != STATE_INVALID ) { + printf("Weird HVM iorequest state %d.\n", p->state); + domain_crash(v->domain); + } } /* diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Sun Aug 13 12:00:38 2006 -0400 @@ -245,18 +245,6 @@ static void vmx_do_launch(struct vcpu *v if (v->vcpu_id == 0) hvm_setup_platform(v->domain); - if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 ) - { - printk("VMX domain bind port %d to vcpu %d failed!\n", - iopacket_port(v), v->vcpu_id); - domain_crash_synchronous(); - } - - HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v)); - - clear_bit(iopacket_port(v), - &v->domain->shared_info->evtchn_mask[0]); - __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : ); error |= __vmwrite(GUEST_CR0, cr0); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/vmx.c Sun Aug 13 12:00:38 2006 -0400 @@ -25,6 +25,7 @@ #include <xen/irq.h> #include <xen/softirq.h> #include <xen/domain_page.h> +#include <xen/hypercall.h> #include <asm/current.h> #include <asm/io.h> #include <asm/shadow.h> @@ -48,8 +49,8 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> -static unsigned long trace_values[NR_CPUS][5]; -#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value +static DEFINE_PER_CPU(unsigned long, trace_values[5]); +#define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); @@ -141,6 +142,7 @@ static void vmx_relinquish_guest_resourc free_domheap_page(VLAPIC(v)->regs_page); xfree(VLAPIC(v)); } + hvm_release_assist_channel(v); } kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer); @@ -149,12 +151,15 @@ static void vmx_relinquish_guest_resourc unmap_domain_page_global( (void *)d->arch.hvm_domain.shared_page_va); + if ( d->arch.hvm_domain.buffered_io_va ) + unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va); + shadow_direct_map_clean(d); } #ifdef __x86_64__ -static struct vmx_msr_state percpu_msr[NR_CPUS]; +static DEFINE_PER_CPU(struct vmx_msr_state, percpu_msr); static u32 msr_data_index[VMX_MSR_COUNT] = { @@ -175,7 +180,7 @@ static void vmx_save_segments(struct vcp */ static void vmx_load_msrs(void) { - struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; + struct vmx_msr_state *host_state = &this_cpu(percpu_msr); int i; while ( host_state->flags ) @@ -188,7 +193,7 @@ static void vmx_load_msrs(void) static void vmx_save_init_msrs(void) { - struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; + struct vmx_msr_state *host_state = &this_cpu(percpu_msr); int i; for ( i = 0; i < VMX_MSR_COUNT; i++ ) @@ -277,7 +282,7 @@ static inline int long_mode_do_msr_write u64 msr_content = regs->eax | ((u64)regs->edx << 32); struct vcpu *v = current; struct vmx_msr_state *msr = &v->arch.hvm_vmx.msr_content; - struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()]; + struct vmx_msr_state *host_state = &this_cpu(percpu_msr); HVM_DBG_LOG(DBG_LEVEL_1, "msr 0x%lx msr_content 0x%"PRIx64"\n", (unsigned long)regs->ecx, msr_content); @@ -359,7 +364,7 @@ static void vmx_restore_msrs(struct vcpu unsigned long guest_flags ; guest_state = &v->arch.hvm_vmx.msr_content;; - host_state = &percpu_msr[smp_processor_id()]; + host_state = &this_cpu(percpu_msr); wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs); guest_flags = guest_state->flags; @@ -671,28 +676,6 @@ static int check_vmx_controls(u32 ctrls, return 1; } -/* Setup HVM interfaces */ -static void vmx_setup_hvm_funcs(void) -{ - if ( hvm_enabled ) - return; - - hvm_funcs.disable = stop_vmx; - - hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources; - hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources; - - hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; - hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; - - hvm_funcs.realmode = vmx_realmode; - hvm_funcs.paging_enabled = vmx_paging_enabled; - hvm_funcs.instruction_length = vmx_instruction_length; - hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg; - - hvm_funcs.init_ap_context = vmx_init_ap_context; -} - static void vmx_init_hypercall_page(struct domain *d, void *hypercall_page) { char *p; @@ -713,6 +696,30 @@ static void vmx_init_hypercall_page(stru /* Don't support HYPERVISOR_iret at the moment */ *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */ +} + +/* Setup HVM interfaces */ +static void vmx_setup_hvm_funcs(void) +{ + if ( hvm_enabled ) + return; + + hvm_funcs.disable = stop_vmx; + + hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources; + hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources; + + hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs; + hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs; + + hvm_funcs.realmode = vmx_realmode; + hvm_funcs.paging_enabled = vmx_paging_enabled; + hvm_funcs.instruction_length = vmx_instruction_length; + hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg; + + hvm_funcs.init_ap_context = vmx_init_ap_context; + + hvm_funcs.init_hypercall_page = vmx_init_hypercall_page; } int start_vmx(void) @@ -780,8 +787,6 @@ int start_vmx(void) vmx_save_init_msrs(); vmx_setup_hvm_funcs(); - - hvm_funcs.init_hypercall_page = vmx_init_hypercall_page; hvm_enabled = 1; @@ -2014,7 +2019,7 @@ void vmx_vmexit_do_hlt(void) next_wakeup = next_pit; if ( next_wakeup != - 1 ) set_timer(¤t->arch.hvm_vmx.hlt_timer, next_wakeup); - hvm_safe_block(); + do_sched_op_compat(SCHEDOP_block, 0); } static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs) @@ -2128,12 +2133,10 @@ asmlinkage void vmx_vmexit_handler(struc asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs) { unsigned int exit_reason; - unsigned long exit_qualification, eip, inst_len = 0; + unsigned long exit_qualification, rip, inst_len = 0; struct vcpu *v = current; - int error; - - error = __vmread(VM_EXIT_REASON, &exit_reason); - BUG_ON(error); + + __vmread(VM_EXIT_REASON, &exit_reason); perfc_incra(vmexits, exit_reason); @@ -2172,11 +2175,9 @@ asmlinkage void vmx_vmexit_handler(struc domain_crash_synchronous(); } - __vmread(GUEST_RIP, &eip); TRACE_VMEXIT(0,exit_reason); - switch ( exit_reason ) - { + switch ( exit_reason ) { case EXIT_REASON_EXCEPTION_NMI: { /* @@ -2187,15 +2188,15 @@ asmlinkage void vmx_vmexit_handler(struc unsigned int vector; unsigned long va; - if (__vmread(VM_EXIT_INTR_INFO, &vector) - || !(vector & INTR_INFO_VALID_MASK)) - __hvm_bug(®s); + if ( __vmread(VM_EXIT_INTR_INFO, &vector) || + !(vector & INTR_INFO_VALID_MASK) ) + domain_crash_synchronous(); vector &= INTR_INFO_VECTOR_MASK; TRACE_VMEXIT(1,vector); perfc_incra(cause_vector, vector); - switch (vector) { + switch ( vector ) { #ifdef XEN_DEBUGGER case TRAP_debug: { @@ -2236,7 +2237,7 @@ asmlinkage void vmx_vmexit_handler(struc { if ( test_bit(_DOMF_debugging, &v->domain->domain_flags) ) domain_pause_for_debugger(); - else + else vmx_reflect_exception(v); break; } @@ -2260,7 +2261,7 @@ asmlinkage void vmx_vmexit_handler(struc (unsigned long)regs.ecx, (unsigned long)regs.edx, (unsigned long)regs.esi, (unsigned long)regs.edi); - if (!vmx_do_page_fault(va, ®s)) { + if ( !vmx_do_page_fault(va, ®s) ) { /* * Inject #PG using Interruption-Information Fields */ @@ -2281,6 +2282,9 @@ asmlinkage void vmx_vmexit_handler(struc } case EXIT_REASON_EXTERNAL_INTERRUPT: vmx_vmexit_do_extint(®s); + break; + case EXIT_REASON_TRIPLE_FAULT: + domain_crash_synchronous(); break; case EXIT_REASON_PENDING_INTERRUPT: /* @@ -2296,7 +2300,7 @@ asmlinkage void vmx_vmexit_handler(struc v->arch.hvm_vcpu.u.vmx.exec_control); break; case EXIT_REASON_TASK_SWITCH: - __hvm_bug(®s); + domain_crash_synchronous(); break; case EXIT_REASON_CPUID: vmx_vmexit_do_cpuid(®s); @@ -2321,7 +2325,7 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_VMCALL: { __get_instruction_length(inst_len); - __vmread(GUEST_RIP, &eip); + __vmread(GUEST_RIP, &rip); __vmread(EXIT_QUALIFICATION, &exit_qualification); hvm_do_hypercall(®s); @@ -2330,13 +2334,13 @@ asmlinkage void vmx_vmexit_handler(struc } case EXIT_REASON_CR_ACCESS: { - __vmread(GUEST_RIP, &eip); + __vmread(GUEST_RIP, &rip); __get_instruction_length(inst_len); __vmread(EXIT_QUALIFICATION, &exit_qualification); - HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx", - eip, inst_len, exit_qualification); - if (vmx_cr_access(exit_qualification, ®s)) + HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, inst_len =%lx, exit_qualification = %lx", + rip, inst_len, exit_qualification); + if ( vmx_cr_access(exit_qualification, ®s) ) __update_guest_eip(inst_len); TRACE_VMEXIT(3,regs.error_code); TRACE_VMEXIT(4,exit_qualification); @@ -2360,13 +2364,14 @@ asmlinkage void vmx_vmexit_handler(struc __update_guest_eip(inst_len); break; case EXIT_REASON_MSR_WRITE: - __vmread(GUEST_RIP, &eip); vmx_do_msr_write(®s); __get_instruction_length(inst_len); __update_guest_eip(inst_len); break; case EXIT_REASON_MWAIT_INSTRUCTION: - __hvm_bug(®s); + case EXIT_REASON_MONITOR_INSTRUCTION: + case EXIT_REASON_PAUSE_INSTRUCTION: + domain_crash_synchronous(); break; case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: @@ -2375,15 +2380,15 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: - case EXIT_REASON_VMOFF: - case EXIT_REASON_VMON: - /* Report invalid opcode exception when a VMX guest tries to execute + case EXIT_REASON_VMXOFF: + case EXIT_REASON_VMXON: + /* Report invalid opcode exception when a VMX guest tries to execute any of the VMX instructions */ vmx_inject_hw_exception(v, TRAP_invalid_op, VMX_DELIVER_NO_ERROR_CODE); break; default: - __hvm_bug(®s); /* should not happen */ + domain_crash_synchronous(); /* should not happen */ } } @@ -2398,11 +2403,11 @@ asmlinkage void vmx_trace_vmentry (void) asmlinkage void vmx_trace_vmentry (void) { TRACE_5D(TRC_VMX_VMENTRY, - trace_values[smp_processor_id()][0], - trace_values[smp_processor_id()][1], - trace_values[smp_processor_id()][2], - trace_values[smp_processor_id()][3], - trace_values[smp_processor_id()][4]); + this_cpu(trace_values)[0], + this_cpu(trace_values)[1], + this_cpu(trace_values)[2], + this_cpu(trace_values)[3], + this_cpu(trace_values)[4]); TRACE_VMEXIT(0,9); TRACE_VMEXIT(1,9); TRACE_VMEXIT(2,9); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Sun Aug 13 12:00:38 2006 -0400 @@ -94,6 +94,9 @@ vmx_process_softirqs: ALIGN ENTRY(vmx_asm_do_vmentry) GET_CURRENT(%ebx) + pushl %ebx + call vmx_do_resume + addl $4, %esp cli # tests must not race interrupts movl VCPU_processor(%ebx),%eax diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Sun Aug 13 12:00:38 2006 -0400 @@ -105,6 +105,8 @@ vmx_process_softirqs: ALIGN ENTRY(vmx_asm_do_vmentry) GET_CURRENT(%rbx) + movq %rbx, %rdi + call vmx_do_resume cli # tests must not race interrupts movl VCPU_processor(%rbx),%eax diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/irq.c Sun Aug 13 12:00:38 2006 -0400 @@ -160,11 +160,12 @@ typedef struct { * Stack of interrupts awaiting EOI on each CPU. These must be popped in * order, as only the current highest-priority pending irq can be EOIed. */ -static struct { +struct pending_eoi { u8 vector; /* Vector awaiting EOI */ u8 ready; /* Ready for EOI now? */ -} pending_eoi[NR_CPUS][NR_VECTORS] __cacheline_aligned; -#define pending_eoi_sp(cpu) (pending_eoi[cpu][NR_VECTORS-1].vector) +}; +static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]); +#define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector) static void __do_IRQ_guest(int vector) { @@ -172,7 +173,8 @@ static void __do_IRQ_guest(int vector) irq_desc_t *desc = &irq_desc[vector]; irq_guest_action_t *action = (irq_guest_action_t *)desc->action; struct domain *d; - int i, sp, cpu = smp_processor_id(); + int i, sp; + struct pending_eoi *peoi = this_cpu(pending_eoi); if ( unlikely(action->nr_guests == 0) ) { @@ -185,13 +187,13 @@ static void __do_IRQ_guest(int vector) if ( action->ack_type == ACKTYPE_EOI ) { - sp = pending_eoi_sp(cpu); - ASSERT((sp == 0) || (pending_eoi[cpu][sp-1].vector < vector)); + sp = pending_eoi_sp(peoi); + ASSERT((sp == 0) || (peoi[sp-1].vector < vector)); ASSERT(sp < (NR_VECTORS-1)); - pending_eoi[cpu][sp].vector = vector; - pending_eoi[cpu][sp].ready = 0; - pending_eoi_sp(cpu) = sp+1; - cpu_set(cpu, action->cpu_eoi_map); + peoi[sp].vector = vector; + peoi[sp].ready = 0; + pending_eoi_sp(peoi) = sp+1; + cpu_set(smp_processor_id(), action->cpu_eoi_map); } for ( i = 0; i < action->nr_guests; i++ ) @@ -207,43 +209,45 @@ static void __do_IRQ_guest(int vector) /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */ static void flush_ready_eoi(void *unused) { - irq_desc_t *desc; - int vector, sp, cpu = smp_processor_id(); + struct pending_eoi *peoi = this_cpu(pending_eoi); + irq_desc_t *desc; + int vector, sp; ASSERT(!local_irq_is_enabled()); - sp = pending_eoi_sp(cpu); - - while ( (--sp >= 0) && pending_eoi[cpu][sp].ready ) - { - vector = pending_eoi[cpu][sp].vector; + sp = pending_eoi_sp(peoi); + + while ( (--sp >= 0) && peoi[sp].ready ) + { + vector = peoi[sp].vector; desc = &irq_desc[vector]; spin_lock(&desc->lock); desc->handler->end(vector); spin_unlock(&desc->lock); } - pending_eoi_sp(cpu) = sp+1; + pending_eoi_sp(peoi) = sp+1; } static void __set_eoi_ready(irq_desc_t *desc) { irq_guest_action_t *action = (irq_guest_action_t *)desc->action; - int vector, sp, cpu = smp_processor_id(); + struct pending_eoi *peoi = this_cpu(pending_eoi); + int vector, sp; vector = desc - irq_desc; if ( !(desc->status & IRQ_GUEST) || (action->in_flight != 0) || - !cpu_test_and_clear(cpu, action->cpu_eoi_map) ) + !cpu_test_and_clear(smp_processor_id(), action->cpu_eoi_map) ) return; - sp = pending_eoi_sp(cpu); + sp = pending_eoi_sp(peoi); do { ASSERT(sp > 0); - } while ( pending_eoi[cpu][--sp].vector != vector ); - ASSERT(!pending_eoi[cpu][sp].ready); - pending_eoi[cpu][sp].ready = 1; + } while ( peoi[--sp].vector != vector ); + ASSERT(!peoi[sp].ready); + peoi[sp].ready = 1; } /* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */ @@ -269,16 +273,17 @@ static void flush_all_pending_eoi(void * { irq_desc_t *desc; irq_guest_action_t *action; - int i, vector, sp, cpu = smp_processor_id(); + struct pending_eoi *peoi = this_cpu(pending_eoi); + int i, vector, sp; ASSERT(!local_irq_is_enabled()); - sp = pending_eoi_sp(cpu); + sp = pending_eoi_sp(peoi); while ( --sp >= 0 ) { - if ( pending_eoi[cpu][sp].ready ) + if ( peoi[sp].ready ) continue; - vector = pending_eoi[cpu][sp].vector; + vector = peoi[sp].vector; desc = &irq_desc[vector]; spin_lock(&desc->lock); action = (irq_guest_action_t *)desc->action; @@ -668,7 +673,7 @@ static int __init setup_dump_irqs(void) } __initcall(setup_dump_irqs); -static struct timer end_irq_timer[NR_CPUS]; +static DEFINE_PER_CPU(struct timer, end_irq_timer); /* * force_intack: Forcibly emit all pending EOIs on each CPU every second. @@ -677,22 +682,13 @@ static struct timer end_irq_timer[NR_CPU static void end_irq_timeout(void *unused) { - int cpu = smp_processor_id(); - local_irq_disable(); flush_all_pending_eoi(NULL); local_irq_enable(); on_selected_cpus(cpu_online_map, flush_ready_eoi, NULL, 1, 0); - set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); -} - -static void __init __setup_irq_timeout(void *unused) -{ - int cpu = smp_processor_id(); - init_timer(&end_irq_timer[cpu], end_irq_timeout, NULL, cpu); - set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000)); + set_timer(&this_cpu(end_irq_timer), NOW() + MILLISECS(1000)); } static int force_intack; @@ -700,8 +696,17 @@ boolean_param("force_intack", force_inta static int __init setup_irq_timeout(void) { - if ( force_intack ) - on_each_cpu(__setup_irq_timeout, NULL, 1, 1); + unsigned int cpu; + + if ( !force_intack ) + return 0; + + for_each_online_cpu ( cpu ) + { + init_timer(&per_cpu(end_irq_timer, cpu), end_irq_timeout, NULL, cpu); + set_timer(&per_cpu(end_irq_timer, cpu), NOW() + MILLISECS(1000)); + } + return 0; } __initcall(setup_irq_timeout); diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/mm.c Sun Aug 13 12:00:38 2006 -0400 @@ -139,20 +139,21 @@ static int mod_l1_entry(l1_pgentry_t *, static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t); /* Used to defer flushing of memory structures. */ -static struct { +struct percpu_mm_info { #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */ #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */ #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */ unsigned int deferred_ops; /* If non-NULL, specifies a foreign subject domain for some operations. */ struct domain *foreign; -} __cacheline_aligned percpu_info[NR_CPUS]; +}; +static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info); /* * Returns the current foreign domain; defaults to the currently-executing * domain if a foreign override hasn't been specified. */ -#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ?: current->domain) +#define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain) /* Private domain structs for DOMID_XEN and DOMID_IO. */ static struct domain *dom_xen, *dom_io; @@ -189,8 +190,6 @@ void arch_init_memory(void) extern void subarch_init_memory(void); unsigned long i, pfn, rstart_pfn, rend_pfn; - - memset(percpu_info, 0, sizeof(percpu_info)); /* * Initialise our DOMID_XEN domain. @@ -378,7 +377,8 @@ void invalidate_shadow_ldt(struct vcpu * } /* Dispose of the (now possibly invalid) mappings from the TLB. */ - percpu_info[v->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; + ASSERT(v->processor == smp_processor_id()); + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; } @@ -1503,7 +1503,7 @@ void free_page_type(struct page_info *pa * (e.g., update_va_mapping()) or we could end up modifying a page * that is no longer a page table (and hence screw up ref counts). */ - percpu_info[smp_processor_id()].deferred_ops |= DOP_FLUSH_ALL_TLBS; + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; if ( unlikely(shadow_mode_enabled(owner)) ) { @@ -1669,10 +1669,8 @@ int get_page_type(struct page_info *page * enter a recursive loop via get_page_from_l1e() * during pagetable revalidation. */ - LOCK_BIGLOCK(current->domain); - cleanup_writable_pagetable(current->domain); + sync_pagetable_state(current->domain); y = page->u.inuse.type_info; - UNLOCK_BIGLOCK(current->domain); /* Can we make progress now? */ if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) || ((y & PGT_count_mask) == 0) ) @@ -1750,8 +1748,6 @@ int new_guest_cr3(unsigned long mfn) int okay; unsigned long old_base_mfn; - ASSERT(writable_pagetable_in_sync(d)); - if ( shadow_mode_refcounts(d) ) { okay = get_page_from_pagenr(mfn, d); @@ -1781,7 +1777,8 @@ int new_guest_cr3(unsigned long mfn) /* Failure here is unrecoverable: the VCPU has no pagetable! */ MEM_LOG("Fatal error while installing new baseptr %lx", mfn); domain_crash(d); - percpu_info[v->processor].deferred_ops = 0; + ASSERT(v->processor == smp_processor_id()); + this_cpu(percpu_mm_info).deferred_ops = 0; return 0; } } @@ -1817,13 +1814,14 @@ int new_guest_cr3(unsigned long mfn) return 1; } -static void process_deferred_ops(unsigned int cpu) +static void process_deferred_ops(void) { unsigned int deferred_ops; struct domain *d = current->domain; - - deferred_ops = percpu_info[cpu].deferred_ops; - percpu_info[cpu].deferred_ops = 0; + struct percpu_mm_info *info = &this_cpu(percpu_mm_info); + + deferred_ops = info->deferred_ops; + info->deferred_ops = 0; if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) ) { @@ -1838,19 +1836,20 @@ static void process_deferred_ops(unsigne if ( deferred_ops & DOP_RELOAD_LDT ) (void)map_ldt_shadow_page(0); - if ( unlikely(percpu_info[cpu].foreign != NULL) ) - { - put_domain(percpu_info[cpu].foreign); - percpu_info[cpu].foreign = NULL; - } -} - -static int set_foreigndom(unsigned int cpu, domid_t domid) + if ( unlikely(info->foreign != NULL) ) + { + put_domain(info->foreign); + info->foreign = NULL; + } +} + +static int set_foreigndom(domid_t domid) { struct domain *e, *d = current->domain; + struct percpu_mm_info *info = &this_cpu(percpu_mm_info); int okay = 1; - ASSERT(percpu_info[cpu].foreign == NULL); + ASSERT(info->foreign == NULL); if ( likely(domid == DOMID_SELF) ) goto out; @@ -1867,7 +1866,7 @@ static int set_foreigndom(unsigned int c { case DOMID_IO: get_knownalive_domain(dom_io); - percpu_info[cpu].foreign = dom_io; + info->foreign = dom_io; break; default: MEM_LOG("Dom %u cannot set foreign dom", d->domain_id); @@ -1877,18 +1876,18 @@ static int set_foreigndom(unsigned int c } else { - percpu_info[cpu].foreign = e = find_domain_by_id(domid); + info->foreign = e = find_domain_by_id(domid); if ( e == NULL ) { switch ( domid ) { case DOMID_XEN: get_knownalive_domain(dom_xen); - percpu_info[cpu].foreign = dom_xen; + info->foreign = dom_xen; break; case DOMID_IO: get_knownalive_domain(dom_io); - percpu_info[cpu].foreign = dom_io; + info->foreign = dom_io; break; default: MEM_LOG("Unknown domain '%u'", domid); @@ -1928,7 +1927,7 @@ int do_mmuext_op( unsigned int foreigndom) { struct mmuext_op op; - int rc = 0, i = 0, okay, cpu = smp_processor_id(); + int rc = 0, i = 0, okay; unsigned long mfn, type; unsigned int done = 0; struct page_info *page; @@ -1937,7 +1936,7 @@ int do_mmuext_op( LOCK_BIGLOCK(d); - cleanup_writable_pagetable(d); + sync_pagetable_state(d); if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { @@ -1946,7 +1945,7 @@ int do_mmuext_op( (void)copy_from_guest(&done, pdone, 1); } - if ( !set_foreigndom(cpu, foreigndom) ) + if ( !set_foreigndom(foreigndom) ) { rc = -ESRCH; goto out; @@ -2042,7 +2041,7 @@ int do_mmuext_op( case MMUEXT_NEW_BASEPTR: mfn = gmfn_to_mfn(current->domain, mfn); okay = new_guest_cr3(mfn); - percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; + this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; break; #ifdef __x86_64__ @@ -2065,7 +2064,7 @@ int do_mmuext_op( #endif case MMUEXT_TLB_FLUSH_LOCAL: - percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB; + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB; break; case MMUEXT_INVLPG_LOCAL: @@ -2137,9 +2136,9 @@ int do_mmuext_op( v->arch.guest_context.ldt_base = ptr; v->arch.guest_context.ldt_ents = ents; load_LDT(v); - percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; + this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT; if ( ents != 0 ) - percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; + this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT; } break; } @@ -2160,7 +2159,7 @@ int do_mmuext_op( } out: - process_deferred_ops(cpu); + process_deferred_ops(); /* Add incremental work we have done to the @done output parameter. */ done += i; @@ -2181,7 +2180,7 @@ int do_mmu_update( void *va; unsigned long gpfn, gmfn, mfn; struct page_info *page; - int rc = 0, okay = 1, i = 0, cpu = smp_processor_id(); + int rc = 0, okay = 1, i = 0; unsigned int cmd, done = 0; struct vcpu *v = current; struct domain *d = v->domain; @@ -2190,7 +2189,7 @@ int do_mmu_update( LOCK_BIGLOCK(d); - cleanup_writable_pagetable(d); + sync_pagetable_state(d); if ( unlikely(shadow_mode_enabled(d)) ) check_pagetable(v, "pre-mmu"); /* debug */ @@ -2205,7 +2204,7 @@ int do_mmu_update( domain_mmap_cache_init(&mapcache); domain_mmap_cache_init(&sh_mapcache); - if ( !set_foreigndom(cpu, foreigndom) ) + if ( !set_foreigndom(foreigndom) ) { rc = -ESRCH; goto out; @@ -2396,7 +2395,7 @@ int do_mmu_update( domain_mmap_cache_destroy(&mapcache); domain_mmap_cache_destroy(&sh_mapcache); - process_deferred_ops(cpu); + process_deferred_ops(); /* Add incremental work we have done to the @done output parameter. */ done += i; @@ -2690,7 +2689,6 @@ int do_update_va_mapping(unsigned long v l1_pgentry_t val = l1e_from_intpte(val64); struct vcpu *v = current; struct domain *d = v->domain; - unsigned int cpu = smp_processor_id(); unsigned long vmask, bmap_ptr; cpumask_t pmask; int rc = 0; @@ -2702,7 +2700,7 @@ int do_update_va_mapping(unsigned long v LOCK_BIGLOCK(d); - cleanup_writable_pagetable(d); + sync_pagetable_state(d); if ( unlikely(shadow_mode_enabled(d)) ) check_pagetable(v, "pre-va"); /* debug */ @@ -2713,9 +2711,10 @@ int do_update_va_mapping(unsigned long v if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) ) { - if ( unlikely(percpu_info[cpu].foreign && + if ( unlikely(this_cpu(percpu_mm_info).foreign && (shadow_mode_translate(d) || - shadow_mode_translate(percpu_info[cpu].foreign))) ) + shadow_mode_translate( + this_cpu(percpu_mm_info).foreign))) ) { /* * The foreign domain's pfn's are in a different namespace. There's @@ -2773,7 +2772,7 @@ int do_update_va_mapping(unsigned long v break; } - process_deferred_ops(cpu); + process_deferred_ops(); UNLOCK_BIGLOCK(d); @@ -2784,13 +2783,12 @@ int do_update_va_mapping_otherdomain(uns unsigned long flags, domid_t domid) { - unsigned int cpu = smp_processor_id(); int rc; if ( unlikely(!IS_PRIV(current->domain)) ) return -EPERM; - if ( !set_foreigndom(cpu, domid) ) + if ( !set_foreigndom(domid) ) return -ESRCH; rc = do_update_va_mapping(va, val64, flags); @@ -3100,131 +3098,6 @@ long arch_memory_op(int op, XEN_GUEST_HA * Writable Pagetables */ -#ifdef VVERBOSE -int ptwr_debug = 0x0; -#define PTWR_PRINTK(_f, _a...) \ - do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 ) -#define PTWR_PRINT_WHICH (which ? 'I' : 'A') -#else -#define PTWR_PRINTK(_f, _a...) ((void)0) -#endif - - -#ifdef PERF_ARRAYS - -/**************** writeable pagetables profiling functions *****************/ - -#define ptwr_eip_buckets 256 - -int ptwr_eip_stat_threshold[] = {1, 10, 50, 100, L1_PAGETABLE_ENTRIES}; - -#define ptwr_eip_stat_thresholdN (sizeof(ptwr_eip_stat_threshold)/sizeof(int)) - -struct { - unsigned long eip; - domid_t id; - u32 val[ptwr_eip_stat_thresholdN]; -} typedef ptwr_eip_stat_t; - -ptwr_eip_stat_t ptwr_eip_stats[ptwr_eip_buckets]; - -static inline unsigned int ptwr_eip_stat_hash( unsigned long eip, domid_t id ) -{ - return (((unsigned long) id) ^ eip ^ (eip>>8) ^ (eip>>16) ^ (eip>24)) % - ptwr_eip_buckets; -} - -static void ptwr_eip_stat_inc(u32 *n) -{ - unsigned int i, j; - - if ( ++(*n) != 0 ) - return; - - *n = ~0; - - /* Re-scale all buckets. */ - for ( i = 0; i < ptwr_eip_buckets; i++ ) - for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ ) - ptwr_eip_stats[i].val[j] >>= 1; -} - -static void ptwr_eip_stat_update(unsigned long eip, domid_t id, int modified) -{ - unsigned int i, j, b; - - i = b = ptwr_eip_stat_hash(eip, id); - - do - { - if ( !ptwr_eip_stats[i].eip ) - { - /* doesn't exist */ - ptwr_eip_stats[i].eip = eip; - ptwr_eip_stats[i].id = id; - memset(ptwr_eip_stats[i].val,0, sizeof(ptwr_eip_stats[i].val)); - } - - if ( ptwr_eip_stats[i].eip == eip && ptwr_eip_stats[i].id == id) - { - for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ ) - if ( modified <= ptwr_eip_stat_threshold[j] ) - break; - BUG_ON(j >= ptwr_eip_stat_thresholdN); - ptwr_eip_stat_inc(&ptwr_eip_stats[i].val[j]); - return; - } - - i = (i+1) % ptwr_eip_buckets; - } - while ( i != b ); - - printk("ptwr_eip_stat: too many EIPs in use!\n"); - - ptwr_eip_stat_print(); - ptwr_eip_stat_reset(); -} - -void ptwr_eip_stat_reset(void) -{ - memset(ptwr_eip_stats, 0, sizeof(ptwr_eip_stats)); -} - -void ptwr_eip_stat_print(void) -{ - struct domain *e; - domid_t d; - unsigned int i, j; - - for_each_domain( e ) - { - d = e->domain_id; - - for ( i = 0; i < ptwr_eip_buckets; i++ ) - { - if ( !ptwr_eip_stats[i].eip || ptwr_eip_stats[i].id != d ) - continue; - - printk("D %5d eip %p ", - ptwr_eip_stats[i].id, (void *)ptwr_eip_stats[i].eip); - - for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ ) - printk("<=%u %4u \t", - ptwr_eip_stat_threshold[j], - ptwr_eip_stats[i].val[j]); - printk("\n"); - } - } -} - -#else /* PERF_ARRAYS */ - -#define ptwr_eip_stat_update(eip, id, modified) ((void)0) - -#endif - -/*******************************************************************/ - /* Re-validate a given p.t. page, given its prior snapshot */ int revalidate_l1( struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot) @@ -3275,112 +3148,6 @@ int revalidate_l1( return modified; } - -/* Flush the given writable p.t. page and write-protect it again. */ -void ptwr_flush(struct domain *d, const int which) -{ - unsigned long l1va; - l1_pgentry_t *pl1e, pte, *ptep; - l2_pgentry_t *pl2e; - unsigned int modified; - -#ifdef CONFIG_X86_64 - struct vcpu *v = current; - int user_mode = !(v->arch.flags & TF_kernel_mode); -#endif - - ASSERT(!shadow_mode_enabled(d)); - - if ( unlikely(d->arch.ptwr[which].vcpu != current) ) - /* Don't use write_ptbase: it may switch to guest_user on x86/64! */ - __write_ptbase(pagetable_get_pfn( - d->arch.ptwr[which].vcpu->arch.guest_table)); - else - TOGGLE_MODE(); - - l1va = d->arch.ptwr[which].l1va; - ptep = (l1_pgentry_t *)&linear_pg_table[l1_linear_offset(l1va)]; - - /* - * STEP 1. Write-protect the p.t. page so no more updates can occur. - */ - - if ( unlikely(__get_user(pte.l1, &ptep->l1)) ) - { - MEM_LOG("ptwr: Could not read pte at %p", ptep); - /* - * Really a bug. We could read this PTE during the initial fault, - * and pagetables can't have changed meantime. - */ - BUG(); - } - PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n", - PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte)); - l1e_remove_flags(pte, _PAGE_RW); - - /* Write-protect the p.t. page in the guest page table. */ - if ( unlikely(__put_user(pte, ptep)) ) - { - MEM_LOG("ptwr: Could not update pte at %p", ptep); - /* - * Really a bug. We could write this PTE during the initial fault, - * and pagetables can't have changed meantime. - */ - BUG(); - } - - /* Ensure that there are no stale writable mappings in any TLB. */ - /* NB. INVLPG is a serialising instruction: flushes pending updates. */ - flush_tlb_one_mask(d->domain_dirty_cpumask, l1va); - PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n", - PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte)); - - /* - * STEP 2. Validate any modified PTEs. - */ - - if ( likely(d == current->domain) ) - { - pl1e = map_domain_page(l1e_get_pfn(pte)); - modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page); - unmap_domain_page(pl1e); - perfc_incr_histo(wpt_updates, modified, PT_UPDATES); - ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified); - d->arch.ptwr[which].prev_nr_updates = modified; - } - else - { - /* - * Must make a temporary global mapping, since we are running in the - * wrong address space, so no access to our own mapcache. - */ - pl1e = map_domain_page_global(l1e_get_pfn(pte)); - modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page); - unmap_domain_page_global(pl1e); - } - - /* - * STEP 3. Reattach the L1 p.t. page into the current address space. - */ - - if ( which == PTWR_PT_ACTIVE ) - { - pl2e = &__linear_l2_table[d->arch.ptwr[which].l2_idx]; - l2e_add_flags(*pl2e, _PAGE_PRESENT); - } - - /* - * STEP 4. Final tidy-up. - */ - - d->arch.ptwr[which].l1va = 0; - - if ( unlikely(d->arch.ptwr[which].vcpu != current) ) - write_ptbase(current); - else - TOGGLE_MODE(); -} - static int ptwr_emulated_update( unsigned long addr, paddr_t old, @@ -3388,7 +3155,7 @@ static int ptwr_emulated_update( unsigned int bytes, unsigned int do_cmpxchg) { - unsigned long pfn, l1va; + unsigned long pfn; struct page_info *page; l1_pgentry_t pte, ol1e, nl1e, *pl1e; struct domain *d = current->domain; @@ -3426,24 +3193,6 @@ static int ptwr_emulated_update( old |= full; } -#if 0 /* XXX KAF: I don't think this can happen. */ - /* - * We must not emulate an update to a PTE that is temporarily marked - * writable by the batched ptwr logic, else we can corrupt page refcnts! - */ - if ( ((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) && - (l1_linear_offset(l1va) == l1_linear_offset(addr)) ) - ptwr_flush(d, PTWR_PT_ACTIVE); - if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) && - (l1_linear_offset(l1va) == l1_linear_offset(addr)) ) - ptwr_flush(d, PTWR_PT_INACTIVE); -#else - BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) && - (l1_linear_offset(l1va) == l1_linear_offset(addr))); - BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) && - (l1_linear_offset(l1va) == l1_linear_offset(addr))); -#endif - /* Read the PTE that maps the page being updated. */ if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], sizeof(pte)) ) @@ -3543,239 +3292,53 @@ int ptwr_do_page_fault(struct domain *d, int ptwr_do_page_fault(struct domain *d, unsigned long addr, struct cpu_user_regs *regs) { - unsigned long pfn; + unsigned long pfn; struct page_info *page; - l1_pgentry_t *pl1e, pte; - l2_pgentry_t *pl2e, l2e; - int which, flags; - unsigned long l2_idx; + l1_pgentry_t pte; + l2_pgentry_t *pl2e, l2e; struct x86_emulate_ctxt emul_ctxt; - ASSERT(!shadow_mode_enabled(d)); + LOCK_BIGLOCK(d); /* * Attempt to read the PTE that maps the VA being accessed. By checking for * PDE validity in the L2 we avoid many expensive fixups in __get_user(). - * NB. The L2 entry cannot be detached due to existing ptwr work: the - * caller already checked that. */ pl2e = &__linear_l2_table[l2_linear_offset(addr)]; if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) || !(l2e_get_flags(l2e) & _PAGE_PRESENT) || __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], sizeof(pte)) ) - { - return 0; - } + goto bail; pfn = l1e_get_pfn(pte); page = mfn_to_page(pfn); -#ifdef CONFIG_X86_64 -#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT | _PAGE_USER) -#else -#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT) -#endif - - /* - * Check the required flags for a valid wrpt mapping. If the page is - * already writable then we can return straight to the guest (SMP race). - * We decide whether or not to propagate the fault by testing for write - * permissions in page directories by writing back to the linear mapping. - */ - if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS ) - return __put_user( - pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1) ? - 0 : EXCRET_not_a_fault; - /* We are looking only for read-only mappings of p.t. pages. */ - if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) || + if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) || ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) || ((page->u.inuse.type_info & PGT_count_mask) == 0) || (page_get_owner(page) != d) ) - { - return 0; - } - -#if 0 /* Leave this in as useful for debugging */ - goto emulate; -#endif - - PTWR_PRINTK("ptwr_page_fault on l1 pt at va %lx, pfn %lx, eip %lx\n", - addr, pfn, (unsigned long)regs->eip); - - /* Get the L2 index at which this L1 p.t. is always mapped. */ - l2_idx = page->u.inuse.type_info & PGT_va_mask; - if ( unlikely(l2_idx >= PGT_va_unknown) ) - goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */ - l2_idx >>= PGT_va_shift; - - if ( unlikely(l2_idx == l2_linear_offset(addr)) ) - goto emulate; /* Urk! Pagetable maps itself! */ - - /* - * Is the L1 p.t. mapped into the current address space? If so we call it - * an ACTIVE p.t., otherwise it is INACTIVE. - */ - pl2e = &__linear_l2_table[l2_idx]; - which = PTWR_PT_INACTIVE; - - if ( (__get_user(l2e.l2, &pl2e->l2) == 0) && (l2e_get_pfn(l2e) == pfn) ) - { - /* - * Check the PRESENT bit to set ACTIVE mode. - * If the PRESENT bit is clear, we may be conflicting with the current - * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr). - * The ptwr_flush call below will restore the PRESENT bit. - */ - if ( likely(l2e_get_flags(l2e) & _PAGE_PRESENT) || - (d->arch.ptwr[PTWR_PT_ACTIVE].l1va && - (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) ) - which = PTWR_PT_ACTIVE; - } - - /* - * Multi-processor guest? Then ensure that the page table is hooked into - * at most one L2, and also ensure that there is only one mapping of the - * page table itself (or there can be conflicting writable mappings from - * other VCPUs). - */ - if ( d->vcpu[0]->next_in_list != NULL ) - { - if ( /* Hooked into at most one L2 table (which this VCPU maps)? */ - ((page->u.inuse.type_info & PGT_count_mask) != - (!!(page->u.inuse.type_info & PGT_pinned) + - (which == PTWR_PT_ACTIVE))) || - /* PTEs are mapped read-only in only one place? */ - ((page->count_info & PGC_count_mask) != - (!!(page->count_info & PGC_allocated) + /* alloc count */ - (page->u.inuse.type_info & PGT_count_mask) + /* type count */ - 1)) ) /* map count */ - { - /* Could be conflicting writable mappings from other VCPUs. */ - cleanup_writable_pagetable(d); - goto emulate; - } - } - - /* - * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a - * time. If there is already one, we must flush it out. - */ - if ( d->arch.ptwr[which].l1va ) - ptwr_flush(d, which); - - /* - * If last batch made no updates then we are probably stuck. Emulate this - * update to ensure we make progress. - */ - if ( d->arch.ptwr[which].prev_nr_updates == 0 ) - { - /* Ensure that we don't get stuck in an emulation-only rut. */ - d->arch.ptwr[which].prev_nr_updates = 1; - goto emulate; - } - - PTWR_PRINTK("[%c] batched ptwr_page_fault at va %lx, pt for %08lx, " - "pfn %lx\n", PTWR_PRINT_WHICH, addr, - l2_idx << L2_PAGETABLE_SHIFT, pfn); - - /* For safety, disconnect the L1 p.t. page from current space. */ - if ( which == PTWR_PT_ACTIVE ) - { - l2e_remove_flags(l2e, _PAGE_PRESENT); - if ( unlikely(__copy_to_user(pl2e, &l2e, sizeof(l2e))) ) - { - MEM_LOG("ptwr: Could not unhook l2e at %p", pl2e); - domain_crash(d); - return 0; - } - flush_tlb_mask(d->domain_dirty_cpumask); - } - - /* Temporarily map the L1 page, and make a copy of it. */ - pl1e = map_domain_page(pfn); - memcpy(d->arch.ptwr[which].page, pl1e, PAGE_SIZE); - unmap_domain_page(pl1e); - - /* Finally, make the p.t. page writable by the guest OS. */ - l1e_add_flags(pte, _PAGE_RW); - if ( unlikely(__put_user(pte.l1, - &linear_pg_table[l1_linear_offset(addr)].l1)) ) - { - MEM_LOG("ptwr: Could not update pte at %p", - &linear_pg_table[l1_linear_offset(addr)]); - domain_crash(d); - return 0; - } - - /* - * Now record the writable pagetable state *after* any accesses that can - * cause a recursive page fault (i.e., those via the *_user() accessors). - * Otherwise we can enter ptwr_flush() with half-done ptwr state. - */ - d->arch.ptwr[which].l1va = addr | 1; - d->arch.ptwr[which].l2_idx = l2_idx; - d->arch.ptwr[which].vcpu = current; -#ifdef PERF_ARRAYS - d->arch.ptwr[which].eip = regs->eip; -#endif - - return EXCRET_fault_fixed; - - emulate: + goto bail; + emul_ctxt.regs = guest_cpu_user_regs(); emul_ctxt.cr2 = addr; emul_ctxt.mode = X86EMUL_MODE_HOST; if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) ) - return 0; + goto bail; + + UNLOCK_BIGLOCK(d); perfc_incrc(ptwr_emulations); return EXCRET_fault_fixed; -} - -int ptwr_init(struct domain *d) -{ - void *x = alloc_xenheap_page(); - void *y = alloc_xenheap_page(); - - if ( (x == NULL) || (y == NULL) ) - { - free_xenheap_page(x); - free_xenheap_page(y); - return -ENOMEM; - } - - d->arch.ptwr[PTWR_PT_ACTIVE].page = x; - d->arch.ptwr[PTWR_PT_INACTIVE].page = y; - + + bail: + UNLOCK_BIGLOCK(d); return 0; } -void ptwr_destroy(struct domain *d) -{ - LOCK_BIGLOCK(d); - cleanup_writable_pagetable(d); - UNLOCK_BIGLOCK(d); - free_xenheap_page(d->arch.ptwr[PTWR_PT_ACTIVE].page); - free_xenheap_page(d->arch.ptwr[PTWR_PT_INACTIVE].page); -} - -void cleanup_writable_pagetable(struct domain *d) -{ - if ( unlikely(!VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) ) - return; - - if ( unlikely(shadow_mode_enabled(d)) ) - { - shadow_sync_all(d); - } - else - { - if ( d->arch.ptwr[PTWR_PT_ACTIVE].l1va ) - ptwr_flush(d, PTWR_PT_ACTIVE); - if ( d->arch.ptwr[PTWR_PT_INACTIVE].l1va ) - ptwr_flush(d, PTWR_PT_INACTIVE); - } +void sync_pagetable_state(struct domain *d) +{ + shadow_sync_all(d); } int map_pages_to_xen( diff -r bb510c274af8 -r 5f92043a3ab1 xen/arch/x86/nmi.c --- a/xen/arch/x86/nmi.c Fri Aug 11 13:30:48 2006 -0400 +++ b/xen/arch/x86/nmi.c Sun Aug 13 12:00:38 2006 -0400 @@ -36,8 +36,8 @@ static unsigned int nmi_hz = HZ; static unsigned int nmi_hz = HZ; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; -static struct timer nmi_timer[NR_CPUS]; -static unsigned int nmi_timer_ticks[NR_CPUS]; +static DEFINE_PER_CPU(struct timer, nmi_timer); +static DEFINE_PER_CPU(unsigned int, nmi_timer_ticks); /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -132,9 +132,8 @@ int __init check_nmi_watchdog (void) static void nmi_timer_fn(void *unused) { - int cpu = smp_processor_id(); - nmi_timer_ticks[cpu]++; - set_timer(&nmi_timer[cpu], NOW() + MILLISECS(1000)); + this_cpu(nmi_timer_ticks)++; + set_timer(&this_cpu(nmi_timer), NOW() + MILLISECS(1000)); } static void disable_lapic_nmi_watchdog(void) @@ -340,9 +339,8 @@ void __pminit setup_apic_nmi_watchdog(vo nmi_active = 1; } -static unsigned int -last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; +static DEFINE_PER_CPU(unsigned int, last_irq_sums); +static DEFINE_PER_CPU(unsigned int, alert_counter); _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |