[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [xenppc-unstable] [merge] with http://xenbits.xensource.com/xen-unstable.hg
# HG changeset patch # User Jimi Xenidis <jimix@xxxxxxxxxxxxxx> # Node ID 4cffec02b4788bc74d8a0ed7560fadbf332892b1 # Parent 96d6f9cfed6e0736e44454c739eb02ee2d77a2e8 [merge] with http://xenbits.xensource.com/xen-unstable.hg --- xen/arch/x86/audit.c | 984 -- xen/arch/x86/shadow.c | 4199 --------- xen/arch/x86/shadow32.c | 3782 -------- xen/arch/x86/shadow_guest32.c | 16 xen/arch/x86/shadow_guest32pae.c | 16 xen/arch/x86/shadow_public.c | 2138 ---- xen/include/asm-x86/shadow_64.h | 587 - xen/include/asm-x86/shadow_ops.h | 138 xen/include/asm-x86/shadow_public.h | 61 xen/include/xen/font.h | 22 .hgignore | 8 extras/mini-os/console/xencons_ring.c | 8 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 8 linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c | 6 linux-2.6-xen-sparse/arch/ia64/dig/setup.c | 110 linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c | 12 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 28 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 5 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 35 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c | 4 linux-2.6-xen-sparse/drivers/xen/blkback/common.h | 8 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 12 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 3 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 12 linux-2.6-xen-sparse/drivers/xen/blktap/common.h | 4 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 5 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c | 54 linux-2.6-xen-sparse/drivers/xen/netback/common.h | 9 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 274 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 39 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 685 - linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c | 1 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c | 1 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 10 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c | 6 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 2 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c | 4 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 62 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c | 10 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h | 14 linux-2.6-xen-sparse/include/xen/balloon.h | 2 linux-2.6-xen-sparse/include/xen/hvm.h | 9 linux-2.6-xen-sparse/include/xen/xenbus.h | 3 tools/blktap/lib/Makefile | 2 tools/examples/xmexample.hvm | 4 tools/firmware/rombios/apmbios.S | 3 tools/firmware/rombios/rombios.c | 2 tools/ioemu/Makefile.target | 1 tools/ioemu/hw/pc.c | 3 tools/ioemu/hw/piix_pci.c | 2 tools/ioemu/hw/xen_platform.c | 138 tools/ioemu/patches/acpi-poweroff-support | 2 tools/ioemu/patches/acpi-support | 15 tools/ioemu/patches/acpi-timer-support | 4 tools/ioemu/patches/domain-destroy | 8 tools/ioemu/patches/domain-reset | 10 tools/ioemu/patches/domain-timeoffset | 12 tools/ioemu/patches/hypervisor-pit | 6 tools/ioemu/patches/ide-hd-multithread | 2 tools/ioemu/patches/ioemu-ia64 | 31 tools/ioemu/patches/qemu-allow-disable-sdl | 2 tools/ioemu/patches/qemu-daemonize | 16 tools/ioemu/patches/qemu-fix-memset-args | 2 tools/ioemu/patches/qemu-fix-write-to-disk-synchronous | 12 tools/ioemu/patches/serial-non-block | 2 tools/ioemu/patches/series | 2 tools/ioemu/patches/shadow-vram | 4 tools/ioemu/patches/shared-vram | 14 tools/ioemu/patches/support-xm-console | 2 tools/ioemu/patches/vnc-access-monitor-vt | 2 tools/ioemu/patches/vnc-cleanup | 4 tools/ioemu/patches/vnc-display-find-unused | 10 tools/ioemu/patches/vnc-fixes | 10 tools/ioemu/patches/vnc-start-vncviewer | 10 tools/ioemu/patches/vnc-title-domain-name | 2 tools/ioemu/patches/xen-mm | 36 tools/ioemu/patches/xen-platform-device | 37 tools/ioemu/patches/xen-support-buffered-ioreqs | 26 tools/ioemu/patches/xenstore-block-device-config | 23 tools/ioemu/patches/xenstore-write-vnc-port | 10 tools/ioemu/vl.c | 76 tools/ioemu/vl.h | 4 tools/libaio/src/Makefile | 7 tools/libxc/xc_domain.c | 13 tools/libxc/xc_hvm_build.c | 176 tools/libxc/xc_linux_build.c | 2 tools/libxc/xc_linux_save.c | 18 tools/libxc/xenctrl.h | 2 tools/misc/xc_shadow.c | 2 tools/python/xen/lowlevel/xc/xc.c | 69 tools/python/xen/xend/XendDomain.py | 24 tools/python/xen/xend/XendDomainInfo.py | 75 tools/python/xen/xend/XendLogging.py | 2 tools/python/xen/xend/image.py | 32 tools/python/xen/xend/server/DevController.py | 22 tools/python/xen/xend/server/XMLRPCServer.py | 4 tools/python/xen/xend/server/blkif.py | 19 tools/python/xen/xm/create.py | 9 tools/python/xen/xm/main.py | 23 tools/xenmon/Makefile | 10 tools/xenstore/Makefile | 2 tools/xentrace/Makefile | 4 unmodified_drivers/linux-2.6/Makefile | 6 unmodified_drivers/linux-2.6/README | 7 unmodified_drivers/linux-2.6/blkfront/Kbuild | 5 unmodified_drivers/linux-2.6/mkbuildtree | 49 unmodified_drivers/linux-2.6/netfront/Kbuild | 4 unmodified_drivers/linux-2.6/overrides.mk | 12 unmodified_drivers/linux-2.6/platform-pci/Kbuild | 7 unmodified_drivers/linux-2.6/platform-pci/evtchn.c | 173 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 271 unmodified_drivers/linux-2.6/platform-pci/platform-pci.h | 45 unmodified_drivers/linux-2.6/platform-pci/xen_support.c | 39 unmodified_drivers/linux-2.6/xenbus/Kbuild | 10 xen/Rules.mk | 6 xen/acm/acm_core.c | 4 xen/acm/acm_simple_type_enforcement_hooks.c | 28 xen/arch/ia64/Rules.mk | 1 xen/arch/ia64/xen/domain.c | 17 xen/arch/ia64/xen/mm.c | 5 xen/arch/x86/Makefile | 16 xen/arch/x86/Rules.mk | 1 xen/arch/x86/acpi/boot.c | 2 xen/arch/x86/apic.c | 12 xen/arch/x86/boot/x86_32.S | 28 xen/arch/x86/cpu/amd.c | 2 xen/arch/x86/cpu/cyrix.c | 2 xen/arch/x86/cpu/transmeta.c | 2 xen/arch/x86/delay.c | 14 xen/arch/x86/dmi_scan.c | 10 xen/arch/x86/dom0_ops.c | 4 xen/arch/x86/domain.c | 109 xen/arch/x86/domain_build.c | 19 xen/arch/x86/extable.c | 4 xen/arch/x86/genapic/bigsmp.c | 2 xen/arch/x86/genapic/es7000.h | 4 xen/arch/x86/genapic/probe.c | 2 xen/arch/x86/hvm/hvm.c | 60 xen/arch/x86/hvm/i8259.c | 6 xen/arch/x86/hvm/intercept.c | 2 xen/arch/x86/hvm/platform.c | 9 xen/arch/x86/hvm/svm/instrlen.c | 2 xen/arch/x86/hvm/svm/svm.c | 691 - xen/arch/x86/hvm/svm/vmcb.c | 31 xen/arch/x86/hvm/svm/x86_32/exits.S | 12 xen/arch/x86/hvm/svm/x86_64/exits.S | 22 xen/arch/x86/hvm/vioapic.c | 6 xen/arch/x86/hvm/vlapic.c | 3 xen/arch/x86/hvm/vmx/vmcs.c | 17 xen/arch/x86/hvm/vmx/vmx.c | 258 xen/arch/x86/hvm/vmx/x86_32/exits.S | 2 xen/arch/x86/hvm/vmx/x86_64/exits.S | 2 xen/arch/x86/i387.c | 2 xen/arch/x86/io_apic.c | 2 xen/arch/x86/microcode.c | 4 xen/arch/x86/mm.c | 575 - xen/arch/x86/mpparse.c | 2 xen/arch/x86/nmi.c | 8 xen/arch/x86/oprofile/nmi_int.c | 4 xen/arch/x86/oprofile/op_model_p4.c | 2 xen/arch/x86/oprofile/xenoprof.c | 6 xen/arch/x86/setup.c | 4 xen/arch/x86/shadow2-common.c | 3410 +++++++ xen/arch/x86/shadow2.c | 4492 ++++++++++ xen/arch/x86/smpboot.c | 2 xen/arch/x86/traps.c | 44 xen/arch/x86/x86_32/domain_page.c | 33 xen/arch/x86/x86_32/entry.S | 105 xen/arch/x86/x86_32/mm.c | 3 xen/arch/x86/x86_64/entry.S | 50 xen/arch/x86/x86_64/mm.c | 5 xen/arch/x86/x86_64/traps.c | 18 xen/common/acm_ops.c | 1 xen/common/dom0_ops.c | 6 xen/common/domain.c | 2 xen/common/elf.c | 1 xen/common/grant_table.c | 4 xen/common/keyhandler.c | 48 xen/common/memory.c | 3 xen/common/rangeset.c | 1 xen/common/sched_bvt.c | 3 xen/common/sched_credit.c | 2 xen/common/sched_sedf.c | 9 xen/common/schedule.c | 3 xen/common/timer.c | 4 xen/common/trace.c | 1 xen/drivers/Makefile | 2 xen/drivers/char/console.c | 209 xen/drivers/char/serial.c | 1 xen/drivers/video/font.h | 22 xen/drivers/video/font_8x14.c | 2 xen/drivers/video/font_8x16.c | 2 xen/drivers/video/font_8x8.c | 2 xen/drivers/video/vga.c | 220 xen/include/acm/acm_core.h | 2 xen/include/asm-ia64/config.h | 2 xen/include/asm-powerpc/shadow.h | 1 xen/include/asm-x86/acpi.h | 2 xen/include/asm-x86/bitops.h | 18 xen/include/asm-x86/config.h | 24 xen/include/asm-x86/domain.h | 99 xen/include/asm-x86/genapic.h | 8 xen/include/asm-x86/grant_table.h | 4 xen/include/asm-x86/hvm/hvm.h | 35 xen/include/asm-x86/hvm/support.h | 15 xen/include/asm-x86/hvm/svm/vmcb.h | 11 xen/include/asm-x86/hvm/vcpu.h | 13 xen/include/asm-x86/hvm/vmx/vmcs.h | 4 xen/include/asm-x86/hvm/vmx/vmx.h | 49 xen/include/asm-x86/io.h | 1 xen/include/asm-x86/mach-es7000/mach_mpparse.h | 2 xen/include/asm-x86/mach-generic/mach_mpparse.h | 4 xen/include/asm-x86/mm.h | 145 xen/include/asm-x86/msr.h | 6 xen/include/asm-x86/page-guest32.h | 7 xen/include/asm-x86/page.h | 37 xen/include/asm-x86/perfc_defn.h | 53 xen/include/asm-x86/processor.h | 17 xen/include/asm-x86/shadow.h | 1791 --- xen/include/asm-x86/shadow2-multi.h | 116 xen/include/asm-x86/shadow2-private.h | 593 + xen/include/asm-x86/shadow2-types.h | 705 + xen/include/asm-x86/shadow2.h | 627 + xen/include/asm-x86/string.h | 2 xen/include/asm-x86/uaccess.h | 6 xen/include/asm-x86/x86_32/page-2level.h | 1 xen/include/asm-x86/x86_32/page-3level.h | 3 xen/include/asm-x86/x86_64/page.h | 5 xen/include/public/arch-x86_32.h | 22 xen/include/public/arch-x86_64.h | 22 xen/include/public/dom0_ops.h | 16 xen/include/public/hvm/e820.h | 5 xen/include/public/xen.h | 14 xen/include/xen/domain_page.h | 13 xen/include/xen/gdbstub.h | 10 xen/include/xen/keyhandler.h | 6 xen/include/xen/lib.h | 6 xen/include/xen/list.h | 12 xen/include/xen/mm.h | 3 xen/include/xen/sched.h | 5 xen/include/xen/vga.h | 14 241 files changed, 14239 insertions(+), 16143 deletions(-) diff -r 96d6f9cfed6e -r 4cffec02b478 .hgignore --- a/.hgignore Sun Aug 20 11:07:52 2006 -0400 +++ b/.hgignore Sun Aug 20 11:08:45 2006 -0400 @@ -151,7 +151,7 @@ ^tools/vtpm_manager/manager/vtpm_managerd$ ^tools/xcutils/xc_restore$ ^tools/xcutils/xc_save$ -^tools/xenmon/setmask$ +^tools/xenmon/xentrace_setmask$ ^tools/xenmon/xenbaked$ ^tools/xenstat/xentop/xentop$ ^tools/xenstore/testsuite/tmp/.*$ @@ -172,7 +172,7 @@ ^tools/xenstore/xs_tdb_dump$ ^tools/xenstore/xs_test$ ^tools/xenstore/xs_watch_stress$ -^tools/xentrace/setsize$ +^tools/xentrace/xentrace_setsize$ ^tools/xentrace/tbctl$ ^tools/xentrace/xenctx$ ^tools/xentrace/xentrace$ @@ -204,3 +204,7 @@ ^xen/arch/powerpc/firmware$ ^xen/arch/powerpc/firmware_image$ ^xen/arch/powerpc/xen\.lds$ +^unmodified_drivers/linux-2.6/\.tmp_versions +^unmodified_drivers/linux-2.6/.*\.cmd$ +^unmodified_drivers/linux-2.6/.*\.ko$ +^unmodified_drivers/linux-2.6/.*\.mod\.c$ diff -r 96d6f9cfed6e -r 4cffec02b478 extras/mini-os/console/xencons_ring.c --- a/extras/mini-os/console/xencons_ring.c Sun Aug 20 11:07:52 2006 -0400 +++ b/extras/mini-os/console/xencons_ring.c Sun Aug 20 11:08:45 2006 -0400 @@ -14,13 +14,13 @@ static inline struct xencons_interface *xencons_interface(void) { - return mfn_to_virt(start_info.console_mfn); + return mfn_to_virt(start_info.console.domU.mfn); } static inline void notify_daemon(void) { /* Use evtchn: this is called early, before irq is set up. */ - notify_remote_via_evtchn(start_info.console_evtchn); + notify_remote_via_evtchn(start_info.console.domU.evtchn); } int xencons_ring_send_no_notify(const char *data, unsigned len) @@ -80,10 +80,10 @@ int xencons_ring_init(void) { int err; - if (!start_info.console_evtchn) + if (!start_info.console.domU.evtchn) return 0; - err = bind_evtchn(start_info.console_evtchn, handle_input, + err = bind_evtchn(start_info.console.domU.evtchn, handle_input, NULL); if (err <= 0) { printk("XEN console request chn bind failed %i\n", err); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Sun Aug 20 11:08:45 2006 -0400 @@ -184,7 +184,6 @@ static struct resource code_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -240,7 +239,6 @@ static struct resource video_rom_resourc .end = 0xc7fff, .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM }; -#endif static struct resource video_ram_resource = { .name = "Video RAM area", @@ -299,7 +297,6 @@ static struct resource standard_io_resou #define STANDARD_IO_RESOURCES \ (sizeof standard_io_resources / sizeof standard_io_resources[0]) -#ifdef CONFIG_XEN_PRIVILEGED_GUEST #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) static int __init romchecksum(unsigned char *rom, unsigned long length) @@ -317,9 +314,11 @@ static void __init probe_roms(void) unsigned char *rom; int i; +#ifdef CONFIG_XEN /* Nothing to do if not running in dom0. */ if (!is_initial_xendomain()) return; +#endif /* video rom */ upper = adapter_rom_resources[0].start; @@ -379,7 +378,6 @@ static void __init probe_roms(void) start = adapter_rom_resources[i++].end & ~2047UL; } } -#endif /* * Point at the empty zero page to start with. We map the real shared_info @@ -1359,9 +1357,7 @@ legacy_init_iomem_resources(struct e820e { int i; -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN) probe_roms(); -#endif for (i = 0; i < nr_map; i++) { struct resource *res; diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c --- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c Sun Aug 20 11:08:45 2006 -0400 @@ -95,7 +95,10 @@ static struct irq_routing_table * __init u8 *addr; struct irq_routing_table *rt; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST +#ifdef CONFIG_XEN + if (!is_initial_xendomain()) + return NULL; +#endif if (pirq_table_addr) { rt = pirq_check_routing_table((u8 *) isa_bus_to_virt(pirq_table_addr)); if (rt) @@ -107,7 +110,6 @@ static struct irq_routing_table * __init if (rt) return rt; } -#endif return NULL; } diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c Sun Aug 20 11:08:45 2006 -0400 @@ -17,14 +17,8 @@ #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/init.h> -#ifdef CONFIG_XEN_PRIVILEGED_GUEST #include <asm/smp.h> #include <asm/ipi.h> -#else -#include <asm/apic.h> -#include <asm/apicdef.h> -#include <asm/genapic.h> -#endif #include <xen/evtchn.h> DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); @@ -118,14 +112,12 @@ static void xen_send_IPI_mask(cpumask_t local_irq_restore(flags); } -#ifdef CONFIG_XEN_PRIVILEGED_GUEST static int xen_apic_id_registered(void) { /* better be set */ Dprintk("%s\n", __FUNCTION__); return physid_isset(smp_processor_id(), phys_cpu_present_map); } -#endif static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask) { @@ -144,15 +136,11 @@ static unsigned int phys_pkg_id(int inde struct genapic apic_xen = { .name = "xen", -#ifdef CONFIG_XEN_PRIVILEGED_GUEST .int_delivery_mode = dest_LowestPrio, -#endif .int_dest_mode = (APIC_DEST_LOGICAL != 0), .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST, .target_cpus = xen_target_cpus, -#ifdef CONFIG_XEN_PRIVILEGED_GUEST .apic_id_registered = xen_apic_id_registered, -#endif .init_apic_ldr = xen_init_apic_ldr, .send_IPI_all = xen_send_IPI_all, .send_IPI_allbutself = xen_send_IPI_allbutself, diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Sun Aug 20 11:08:45 2006 -0400 @@ -189,7 +189,6 @@ struct resource code_resource = { #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM) -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN) static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -218,19 +217,16 @@ static struct resource adapter_rom_resou { .name = "Adapter ROM", .start = 0, .end = 0, .flags = IORESOURCE_ROM } }; -#endif #define ADAPTER_ROM_RESOURCES \ (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN) static struct resource video_rom_resource = { .name = "Video ROM", .start = 0xc0000, .end = 0xc7fff, .flags = IORESOURCE_ROM, }; -#endif static struct resource video_ram_resource = { .name = "Video RAM area", @@ -239,7 +235,6 @@ static struct resource video_ram_resourc .flags = IORESOURCE_RAM, }; -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN) #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) static int __init romchecksum(unsigned char *rom, unsigned long length) @@ -256,6 +251,12 @@ static void __init probe_roms(void) unsigned long start, length, upper; unsigned char *rom; int i; + +#ifdef CONFIG_XEN + /* Nothing to do if not running in dom0. */ + if (!is_initial_xendomain()) + return; +#endif /* video rom */ upper = adapter_rom_resources[0].start; @@ -315,7 +316,6 @@ static void __init probe_roms(void) start = adapter_rom_resources[i++].end & ~2047UL; } } -#endif static __init void parse_cmdline_early (char ** cmdline_p) { @@ -625,11 +625,8 @@ void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p) { unsigned long kernel_end; - -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) struct e820entry *machine_e820; struct xen_memory_map memmap; -#endif #ifdef CONFIG_XEN /* Register a call for panic conditions. */ @@ -936,8 +933,8 @@ void __init setup_arch(char **cmdline_p) * Request address space for all standard RAM and ROM resources * and also for regions reported as reserved by the e820. */ -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) probe_roms(); +#ifdef CONFIG_XEN if (is_initial_xendomain()) { machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); @@ -948,13 +945,8 @@ void __init setup_arch(char **cmdline_p) e820_reserve_resources(machine_e820, memmap.nr_entries); } else - e820_reserve_resources(e820.map, e820.nr_map); -#elif defined(CONFIG_XEN) +#endif e820_reserve_resources(e820.map, e820.nr_map); -#else - probe_roms(); - e820_reserve_resources(e820.map, e820.nr_map); -#endif request_resource(&iomem_resource, &video_ram_resource); @@ -965,12 +957,12 @@ void __init setup_arch(char **cmdline_p) request_resource(&ioport_resource, &standard_io_resources[i]); } -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) +#ifdef CONFIG_XEN if (is_initial_xendomain()) { e820_setup_gap(machine_e820, memmap.nr_entries); free_bootmem(__pa(machine_e820), PAGE_SIZE); } -#elif !defined(CONFIG_XEN) +#else e820_setup_gap(e820.map, e820.nr_map); #endif diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Sun Aug 20 11:08:45 2006 -0400 @@ -343,7 +343,6 @@ static void backend_changed(struct xenbu case XenbusStateInitialising: case XenbusStateInitWait: case XenbusStateInitialised: - case XenbusStateUnknown: break; case XenbusStateConnected: @@ -354,10 +353,10 @@ static void backend_changed(struct xenbu tpmif_set_connected_state(tp, 0); break; + case XenbusStateUnknown: case XenbusStateClosed: - if (tp->is_suspended == 0) { + if (tp->is_suspended == 0) device_unregister(&dev->dev); - } xenbus_switch_state(dev, XenbusStateClosed); break; } diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sun Aug 20 11:08:45 2006 -0400 @@ -76,7 +76,7 @@ static unsigned long target_pages; static unsigned long target_pages; /* We increase/decrease in batches which fit in a page */ -static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; /* VM /proc information for memory */ extern unsigned long totalram_pages; @@ -440,20 +440,16 @@ static int balloon_read(char *page, char "Requested target: %8lu kB\n" "Low-mem balloon: %8lu kB\n" "High-mem balloon: %8lu kB\n" + "Driver pages: %8lu kB\n" "Xen hard limit: ", PAGES2KB(current_pages), PAGES2KB(target_pages), - PAGES2KB(balloon_low), PAGES2KB(balloon_high)); - - if (hard_limit != ~0UL) { - len += sprintf( - page + len, - "%8lu kB (inc. %8lu kB driver headroom)\n", - PAGES2KB(hard_limit), PAGES2KB(driver_pages)); - } else { - len += sprintf( - page + len, - " ??? kB\n"); - } + PAGES2KB(balloon_low), PAGES2KB(balloon_high), + PAGES2KB(driver_pages)); + + if (hard_limit != ~0UL) + len += sprintf(page + len, "%8lu kB\n", PAGES2KB(hard_limit)); + else + len += sprintf(page + len, " ??? kB\n"); *eof = 1; return len; @@ -610,8 +606,21 @@ void balloon_dealloc_empty_page_range( schedule_work(&balloon_worker); } +void balloon_release_driver_page(struct page *page) +{ + unsigned long flags; + + balloon_lock(flags); + balloon_append(page); + driver_pages--; + balloon_unlock(flags); + + schedule_work(&balloon_worker); +} + EXPORT_SYMBOL_GPL(balloon_update_driver_allowance); EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range); EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range); +EXPORT_SYMBOL_GPL(balloon_release_driver_page); MODULE_LICENSE("Dual BSD/GPL"); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sun Aug 20 11:08:45 2006 -0400 @@ -341,7 +341,7 @@ static void dispatch_rw_block_io(blkif_t blkif_request_t *req, pending_req_t *pending_req) { - extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; @@ -409,7 +409,7 @@ static void dispatch_rw_block_io(blkif_t DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, - preq.sector_number + preq.nr_sects, preq.dev); + preq.sector_number + preq.nr_sects, preq.dev); goto fail_flush; } diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Sun Aug 20 11:08:45 2006 -0400 @@ -55,9 +55,9 @@ struct vbd { unsigned char type; /* VDISK_xxx */ u32 pdevice; /* phys device that this vbd maps to */ struct block_device *bdev; -}; +}; -struct backend_info; +struct backend_info; typedef struct blkif_st { /* Unique identifier for this interface. */ @@ -72,7 +72,7 @@ typedef struct blkif_st { /* The VBD attached to this interface. */ struct vbd vbd; /* Back pointer to the backend_info. */ - struct backend_info *be; + struct backend_info *be; /* Private fields. */ spinlock_t blk_ring_lock; atomic_t refcnt; @@ -122,7 +122,7 @@ struct phys_req { blkif_sector_t sector_number; }; -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); void blkif_interface_init(void); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sun Aug 20 11:08:45 2006 -0400 @@ -194,7 +194,7 @@ static int blkback_probe(struct xenbus_d } /* setup back pointer */ - be->blkif->be = be; + be->blkif->be = be; err = xenbus_watch_path2(dev, dev->nodename, "physical-device", &be->backend_watch, backend_changed); @@ -287,7 +287,7 @@ static void backend_changed(struct xenbu } /* We're potentially connected now */ - update_blkif_status(be->blkif); + update_blkif_status(be->blkif); } } @@ -305,6 +305,11 @@ static void frontend_changed(struct xenb switch (frontend_state) { case XenbusStateInitialising: + if (dev->state == XenbusStateClosing) { + printk("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } break; case XenbusStateInitialised: @@ -326,12 +331,11 @@ static void frontend_changed(struct xenb xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateUnknown: case XenbusStateClosed: device_unregister(&dev->dev); break; - case XenbusStateUnknown: - case XenbusStateInitWait: default: xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", frontend_state); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sun Aug 20 11:08:45 2006 -0400 @@ -46,6 +46,7 @@ #include <xen/interface/grant_table.h> #include <xen/gnttab.h> #include <asm/hypervisor.h> +#include <asm/maddr.h> #define BLKIF_STATE_DISCONNECTED 0 #define BLKIF_STATE_CONNECTED 1 @@ -255,10 +256,10 @@ static void backend_changed(struct xenbu DPRINTK("blkfront:backend_changed.\n"); switch (backend_state) { - case XenbusStateUnknown: case XenbusStateInitialising: case XenbusStateInitWait: case XenbusStateInitialised: + case XenbusStateUnknown: case XenbusStateClosed: break; diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Aug 20 11:08:45 2006 -0400 @@ -157,7 +157,7 @@ typedef unsigned int PEND_RING_IDX; typedef unsigned int PEND_RING_IDX; static inline int MASK_PEND_IDX(int i) { - return (i & (MAX_PENDING_REQS-1)); + return (i & (MAX_PENDING_REQS-1)); } static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) { @@ -754,7 +754,7 @@ static int req_increase(void) if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) { kfree(pending_reqs[mmap_alloc]); kfree(pending_addrs[mmap_alloc]); - WPRINTK("%s: out of memory\n", __FUNCTION__); + WPRINTK("%s: out of memory\n", __FUNCTION__); ret = -ENOMEM; goto done; } @@ -1051,7 +1051,7 @@ static int blktap_read_ufe_ring(int idx) unsigned long kvaddr, uvaddr; struct page **map = info->vma->vm_private_data; struct page *pg; - int offset; + int offset; uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j); kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, @@ -1063,7 +1063,7 @@ static int blktap_read_ufe_ring(int idx) >> PAGE_SHIFT; map[offset] = NULL; } - fast_flush_area(pending_req, pending_idx, usr_idx, idx); + fast_flush_area(pending_req, pending_idx, usr_idx, idx); make_response(blkif, pending_req->id, resp->operation, resp->status); info->idx_map[usr_idx] = INVALID_REQ; @@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif "ring does not exist!\n"); print_dbug = 0; /*We only print this message once*/ } - return 1; + return 1; } info = tapfds[blkif->dev_num]; @@ -1185,7 +1185,7 @@ static void dispatch_rw_block_io(blkif_t blkif_request_t *req, pending_req_t *pending_req) { - extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; unsigned int nseg; diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/blktap/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Sun Aug 20 11:08:45 2006 -0400 @@ -49,7 +49,7 @@ #define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) -struct backend_info; +struct backend_info; typedef struct blkif_st { /* Unique identifier for this interface. */ @@ -62,7 +62,7 @@ typedef struct blkif_st { blkif_back_ring_t blk_ring; struct vm_struct *blk_ring_area; /* Back pointer to the backend_info. */ - struct backend_info *be; + struct backend_info *be; /* Private fields. */ spinlock_t blk_ring_lock; atomic_t refcnt; diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Sun Aug 20 11:08:45 2006 -0400 @@ -174,7 +174,7 @@ static int blktap_probe(struct xenbus_de } /* setup back pointer */ - be->blkif->be = be; + be->blkif->be = be; be->blkif->sectors = 0; /* set a watch on disk info, waiting for userspace to update details*/ @@ -267,12 +267,11 @@ static void tap_frontend_changed(struct xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateUnknown: case XenbusStateClosed: device_unregister(&dev->dev); break; - case XenbusStateUnknown: - case XenbusStateInitWait: default: xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", frontend_state); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Sun Aug 20 11:08:45 2006 -0400 @@ -41,6 +41,8 @@ #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/synch_bitops.h> +#include <asm/io.h> +#include <xen/interface/memory.h> /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 @@ -350,6 +352,8 @@ void gnttab_cancel_free_callback(struct } EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); +#ifdef CONFIG_XEN + #ifndef __ia64__ static int map_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) @@ -410,17 +414,53 @@ int gnttab_resume(void) int gnttab_suspend(void) { - #ifndef __ia64__ apply_to_page_range(&init_mm, (unsigned long)shared, PAGE_SIZE * NR_GRANT_FRAMES, unmap_pte_fn, NULL); #endif - - return 0; -} - -static int __init gnttab_init(void) + return 0; +} + +#else /* !CONFIG_XEN */ + +#include <platform-pci.h> + +int gnttab_resume(void) +{ + unsigned long frames; + struct xen_add_to_physmap xatp; + unsigned int i; + + frames = alloc_xen_mmio(PAGE_SIZE * NR_GRANT_FRAMES); + + for (i = 0; i < NR_GRANT_FRAMES; i++) { + xatp.domid = DOMID_SELF; + xatp.idx = i; + xatp.space = XENMAPSPACE_grant_table; + xatp.gpfn = (frames >> PAGE_SHIFT) + i; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + BUG(); + } + + shared = ioremap(frames, PAGE_SIZE * NR_GRANT_FRAMES); + if (shared == NULL) { + printk("error to ioremap gnttab share frames\n"); + return -1; + } + + return 0; +} + +int gnttab_suspend(void) +{ + iounmap(shared); + return 0; +} + +#endif /* !CONFIG_XEN */ + +int __init gnttab_init(void) { int i; @@ -439,4 +479,6 @@ static int __init gnttab_init(void) return 0; } +#ifdef CONFIG_XEN core_initcall(gnttab_init); +#endif diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Sun Aug 20 11:08:45 2006 -0400 @@ -64,9 +64,9 @@ typedef struct netif_st { /* Physical parameters of the comms window. */ grant_handle_t tx_shmem_handle; - grant_ref_t tx_shmem_ref; + grant_ref_t tx_shmem_ref; grant_handle_t rx_shmem_handle; - grant_ref_t rx_shmem_ref; + grant_ref_t rx_shmem_ref; unsigned int evtchn; unsigned int irq; @@ -78,7 +78,10 @@ typedef struct netif_st { /* Set of features that can be turned on in dev->features. */ int features; - int can_queue; + + /* Internal feature information. */ + int can_queue:1; /* can queue packets for receiver? */ + int copying_receiver:1; /* copy packets to receiver? */ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ RING_IDX rx_req_cons_peek; diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sun Aug 20 11:08:45 2006 -0400 @@ -43,6 +43,7 @@ struct netbk_rx_meta { struct netbk_rx_meta { skb_frag_t frag; int id; + int copy:1; }; static void netif_idx_release(u16 pending_idx); @@ -72,6 +73,8 @@ static unsigned long mmap_vstart; static unsigned long mmap_vstart; #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) +static void *rx_mmap_area; + #define PKT_PROT_LEN 64 static struct { @@ -277,12 +280,11 @@ int netif_be_start_xmit(struct sk_buff * goto drop; } - /* - * We do not copy the packet unless: - * 1. The data -- including any in fragments -- is shared; or - * 2. The data is not allocated from our special cache. - */ - if (!is_flippable_skb(skb)) { + /* Copy the packet here if it's destined for a flipping + interface but isn't flippable (e.g. extra references to + data) + */ + if (!netif->copying_receiver && !is_flippable_skb(skb)) { struct sk_buff *nskb = netbk_copy_skb(skb); if ( unlikely(nskb == NULL) ) goto drop; @@ -328,7 +330,7 @@ static void xen_network_done_notify(void /* * Add following to poll() function in NAPI driver (Tigon3 is example): * if ( xen_network_done() ) - * tg3_enable_ints(tp); + * tg3_enable_ints(tp); */ int xen_network_done(void) { @@ -340,49 +342,74 @@ struct netrx_pending_operations { unsigned trans_prod, trans_cons; unsigned mmu_prod, mmu_cons; unsigned mcl_prod, mcl_cons; + unsigned copy_prod, copy_cons; unsigned meta_prod, meta_cons; mmu_update_t *mmu; gnttab_transfer_t *trans; + gnttab_copy_t *copy; multicall_entry_t *mcl; struct netbk_rx_meta *meta; }; -static u16 netbk_gop_frag(netif_t *netif, struct page *page, - int i, struct netrx_pending_operations *npo) +/* Set up the grant operations for this fragment. If it's a flipping + interface, we also set up the unmap request from here. */ +static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, + int i, struct netrx_pending_operations *npo, + struct page *page, unsigned long size, + unsigned long offset) { mmu_update_t *mmu; gnttab_transfer_t *gop; + gnttab_copy_t *copy_gop; multicall_entry_t *mcl; netif_rx_request_t *req; unsigned long old_mfn, new_mfn; old_mfn = virt_to_mfn(page_address(page)); - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - new_mfn = alloc_mfn(); - - /* - * Set the new P2M table entry before reassigning - * the old data page. Heed the comment in - * pgtable-2level.h:pte_page(). :-) - */ - set_phys_to_machine(page_to_pfn(page), new_mfn); - - mcl = npo->mcl + npo->mcl_prod++; - MULTI_update_va_mapping(mcl, (unsigned long)page_address(page), - pfn_pte_ma(new_mfn, PAGE_KERNEL), 0); - - mmu = npo->mmu + npo->mmu_prod++; - mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE; - mmu->val = page_to_pfn(page); - } - req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); - gop = npo->trans + npo->trans_prod++; - gop->mfn = old_mfn; - gop->domid = netif->domid; - gop->ref = req->gref; + if (netif->copying_receiver) { + /* The fragment needs to be copied rather than + flipped. */ + meta->copy = 1; + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->source.domid = DOMID_SELF; + copy_gop->source.offset = offset; + copy_gop->source.u.gmfn = old_mfn; + copy_gop->dest.domid = netif->domid; + copy_gop->dest.offset = 0; + copy_gop->dest.u.ref = req->gref; + copy_gop->len = size; + copy_gop->flags = GNTCOPY_dest_gref; + } else { + meta->copy = 0; + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + new_mfn = alloc_mfn(); + + /* + * Set the new P2M table entry before + * reassigning the old data page. Heed the + * comment in pgtable-2level.h:pte_page(). :-) + */ + set_phys_to_machine(page_to_pfn(page), new_mfn); + + mcl = npo->mcl + npo->mcl_prod++; + MULTI_update_va_mapping(mcl, + (unsigned long)page_address(page), + pfn_pte_ma(new_mfn, PAGE_KERNEL), + 0); + + mmu = npo->mmu + npo->mmu_prod++; + mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE; + mmu->val = page_to_pfn(page); + } + + gop = npo->trans + npo->trans_prod++; + gop->mfn = old_mfn; + gop->domid = netif->domid; + gop->ref = req->gref; + } return req->id; } @@ -403,18 +430,21 @@ static void netbk_gop_skb(struct sk_buff for (i = 0; i < nr_frags; i++) { meta = npo->meta + npo->meta_prod++; meta->frag = skb_shinfo(skb)->frags[i]; - meta->id = netbk_gop_frag(netif, meta->frag.page, - i + extra, npo); + meta->id = netbk_gop_frag(netif, meta, i + extra, npo, + meta->frag.page, + meta->frag.size, + meta->frag.page_offset); } /* * This must occur at the end to ensure that we don't trash * skb_shinfo until we're done. */ - head_meta->id = netbk_gop_frag(netif, + head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo, virt_to_page(skb->data), - 0, - npo); + skb_headlen(skb), + offset_in_page(skb->data)); + netif->rx.req_cons += nr_frags + extra; } @@ -430,32 +460,43 @@ static inline void netbk_free_pages(int used to set up the operations on the top of netrx_pending_operations, which have since been done. Check that they didn't give any errors and advance over them. */ -static int netbk_check_gop(int nr_frags, domid_t domid, int count, +static int netbk_check_gop(int nr_frags, domid_t domid, struct netrx_pending_operations *npo) { multicall_entry_t *mcl; gnttab_transfer_t *gop; + gnttab_copy_t *copy_op; int status = NETIF_RSP_OKAY; int i; for (i = 0; i <= nr_frags; i++) { - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - mcl = npo->mcl + npo->mcl_cons++; - /* The update_va_mapping() must not fail. */ - BUG_ON(mcl->result != 0); - } - - gop = npo->trans + npo->trans_cons++; - /* Check the reassignment error code. */ - if (gop->status != 0) { - DPRINTK("Bad status %d from grant transfer to DOM%u\n", - gop->status, domid); - /* - * Page no longer belongs to us unless GNTST_bad_page, - * but that should be a fatal error anyway. - */ - BUG_ON(gop->status == GNTST_bad_page); - status = NETIF_RSP_ERROR; + if (npo->meta[npo->meta_cons + i].copy) { + copy_op = npo->copy + npo->copy_cons++; + if (copy_op->status != GNTST_okay) { + DPRINTK("Bad status %d from copy to DOM%d.\n", + gop->status, domid); + status = NETIF_RSP_ERROR; + } + } else { + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = npo->mcl + npo->mcl_cons++; + /* The update_va_mapping() must not fail. */ + BUG_ON(mcl->result != 0); + } + + gop = npo->trans + npo->trans_cons++; + /* Check the reassignment error code. */ + if (gop->status != 0) { + DPRINTK("Bad status %d from grant transfer to DOM%u\n", + gop->status, domid); + /* + * Page no longer belongs to us unless + * GNTST_bad_page, but that should be + * a fatal error anyway. + */ + BUG_ON(gop->status == GNTST_bad_page); + status = NETIF_RSP_ERROR; + } } } @@ -466,23 +507,27 @@ static void netbk_add_frag_responses(net struct netbk_rx_meta *meta, int nr_frags) { int i; + unsigned long offset; for (i = 0; i < nr_frags; i++) { int id = meta[i].id; int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; - make_rx_response(netif, id, status, meta[i].frag.page_offset, + if (meta[i].copy) + offset = 0; + else + offset = meta[i].frag.page_offset; + make_rx_response(netif, id, status, offset, meta[i].frag.size, flags); } } static void net_rx_action(unsigned long unused) { - netif_t *netif = NULL; + netif_t *netif = NULL; s8 status; u16 id, irq, flags; netif_rx_response_t *resp; - struct netif_extra_info *extra; multicall_entry_t *mcl; struct sk_buff_head rxq; struct sk_buff *skb; @@ -490,6 +535,7 @@ static void net_rx_action(unsigned long int ret; int nr_frags; int count; + unsigned long offset; /* * Putting hundreds of bytes on the stack is considered rude. @@ -497,14 +543,16 @@ static void net_rx_action(unsigned long */ static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3]; static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; - static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; + static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE]; + static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE]; static unsigned char rx_notify[NR_IRQS]; static u16 notify_list[NET_RX_RING_SIZE]; static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; struct netrx_pending_operations npo = { mmu: rx_mmu, - trans: grant_rx_op, + trans: grant_trans_op, + copy: grant_copy_op, mcl: rx_mcl, meta: meta}; @@ -538,12 +586,8 @@ static void net_rx_action(unsigned long break; } - if (!count) - return; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - BUG_ON(npo.mcl_prod == 0); - + if (npo.mcl_prod && + !xen_feature(XENFEAT_auto_translated_physmap)) { mcl = npo.mcl + npo.mcl_prod++; BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping); @@ -551,36 +595,63 @@ static void net_rx_action(unsigned long mcl->op = __HYPERVISOR_mmu_update; mcl->args[0] = (unsigned long)rx_mmu; - mcl->args[1] = count; + mcl->args[1] = npo.mmu_prod; mcl->args[2] = 0; mcl->args[3] = DOMID_SELF; } - mcl = npo.mcl + npo.mcl_prod++; - mcl->op = __HYPERVISOR_grant_table_op; - mcl->args[0] = GNTTABOP_transfer; - mcl->args[1] = (unsigned long)grant_rx_op; - mcl->args[2] = npo.trans_prod; + if (npo.trans_prod) { + mcl = npo.mcl + npo.mcl_prod++; + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = GNTTABOP_transfer; + mcl->args[1] = (unsigned long)grant_trans_op; + mcl->args[2] = npo.trans_prod; + } + + if (npo.copy_prod) { + mcl = npo.mcl + npo.mcl_prod++; + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = GNTTABOP_copy; + mcl->args[1] = (unsigned long)grant_copy_op; + mcl->args[2] = npo.copy_prod; + } + + /* Nothing to do? */ + if (!npo.mcl_prod) + return; + + BUG_ON(npo.copy_prod > NET_RX_RING_SIZE); + BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE); + BUG_ON(npo.trans_prod > NET_RX_RING_SIZE); + BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3); + BUG_ON(npo.meta_prod > NET_RX_RING_SIZE); ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod); BUG_ON(ret != 0); - BUG_ON(mcl->result != 0); - - count = 0; + while ((skb = __skb_dequeue(&rxq)) != NULL) { nr_frags = *(int *)skb->cb; - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - netif = netdev_priv(skb->dev); + /* We can't rely on skb_release_data to release the + pages used by fragments for us, since it tries to + touch the pages in the fraglist. If we're in + flipping mode, that doesn't work. In copying mode, + we still have access to all of the pages, and so + it's safe to let release_data deal with it. */ + /* (Freeing the fragments is safe since we copy + non-linear skbs destined for flipping interfaces) */ + if (!netif->copying_receiver) { + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->frag_list = NULL; + skb_shinfo(skb)->nr_frags = 0; + netbk_free_pages(nr_frags, meta + npo.meta_cons + 1); + } + netif->stats.tx_bytes += skb->len; netif->stats.tx_packets++; - netbk_free_pages(nr_frags, meta + npo.meta_cons + 1); - status = netbk_check_gop(nr_frags, netif->domid, count, - &npo); + status = netbk_check_gop(nr_frags, netif->domid, &npo); id = meta[npo.meta_cons].id; flags = nr_frags ? NETRXF_more_data : 0; @@ -590,11 +661,12 @@ static void net_rx_action(unsigned long else if (skb->proto_data_valid) /* remote but checksummed? */ flags |= NETRXF_data_validated; - resp = make_rx_response(netif, id, status, - offset_in_page(skb->data), + if (meta[npo.meta_cons].copy) + offset = 0; + else + offset = offset_in_page(skb->data); + resp = make_rx_response(netif, id, status, offset, skb_headlen(skb), flags); - - extra = NULL; if (meta[npo.meta_cons].frag.size) { struct netif_extra_info *gso = @@ -602,10 +674,7 @@ static void net_rx_action(unsigned long RING_GET_RESPONSE(&netif->rx, netif->rx.rsp_prod_pvt++); - if (extra) - extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; - else - resp->flags |= NETRXF_extra_info; + resp->flags |= NETRXF_extra_info; gso->u.gso.size = meta[npo.meta_cons].frag.size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; @@ -614,7 +683,6 @@ static void net_rx_action(unsigned long gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; - extra = gso; } netbk_add_frag_responses(netif, status, @@ -634,7 +702,6 @@ static void net_rx_action(unsigned long netif_put(netif); dev_kfree_skb(skb); - npo.meta_cons += nr_frags + 1; } @@ -1095,7 +1162,7 @@ static void net_tx_action(unsigned long if (unlikely(txreq.size < ETH_HLEN)) { DPRINTK("Bad packet size: %d\n", txreq.size); netbk_tx_err(netif, &txreq, i); - continue; + continue; } /* No crossing a page as the payload mustn't fragment. */ @@ -1151,6 +1218,9 @@ static void net_tx_action(unsigned long skb_shinfo(skb)->nr_frags++; skb_shinfo(skb)->frags[0].page = (void *)(unsigned long)pending_idx; + } else { + /* Discriminate from any valid pending_idx value. */ + skb_shinfo(skb)->frags[0].page = (void *)~0UL; } __skb_queue_tail(&tx_queue, skb); @@ -1251,6 +1321,12 @@ static void netif_page_release(struct pa set_page_count(page, 1); netif_idx_release(pending_idx); +} + +static void netif_rx_page_release(struct page *page) +{ + /* Ready for next use. */ + set_page_count(page, 1); } irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) @@ -1383,6 +1459,16 @@ static int __init netback_init(void) SetPageForeign(page, netif_page_release); } + page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE); + BUG_ON(page == NULL); + rx_mmap_area = pfn_to_kaddr(page_to_pfn(page)); + + for (i = 0; i < NET_RX_RING_SIZE; i++) { + page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE)); + set_page_count(page, 1); + SetPageForeign(page, netif_rx_page_release); + } + pending_cons = 0; pending_prod = MAX_PENDING_REQS; for (i = 0; i < MAX_PENDING_REQS; i++) diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Sun Aug 20 11:08:45 2006 -0400 @@ -108,6 +108,12 @@ static int netback_probe(struct xenbus_d goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); + if (err) { + message = "writing feature-copying"; + goto abort_transaction; + } + err = xenbus_transaction_end(xbt, 0); } while (err == -EAGAIN); @@ -228,10 +234,25 @@ static void frontend_changed(struct xenb switch (frontend_state) { case XenbusStateInitialising: + if (dev->state == XenbusStateClosing) { + printk("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + if (be->netif) { + netif_disconnect(be->netif); + be->netif = NULL; + } + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + case XenbusStateInitialised: break; case XenbusStateConnected: + if (!be->netif) { + /* reconnect: setup be->netif */ + backend_changed(&be->backend_watch, NULL, 0); + } maybe_connect(be); break; @@ -239,14 +260,13 @@ static void frontend_changed(struct xenb xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateUnknown: case XenbusStateClosed: if (be->netif != NULL) kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); device_unregister(&dev->dev); break; - case XenbusStateUnknown: - case XenbusStateInitWait: default: xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", frontend_state); @@ -349,7 +369,7 @@ static int connect_rings(struct backend_ { struct xenbus_device *dev = be->dev; unsigned long tx_ring_ref, rx_ring_ref; - unsigned int evtchn; + unsigned int evtchn, rx_copy; int err; int val; @@ -365,6 +385,19 @@ static int connect_rings(struct backend_ dev->otherend); return err; } + + err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", + &rx_copy); + if (err == -ENOENT) { + err = 0; + rx_copy = 0; + } + if (err < 0) { + xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy", + dev->otherend); + return err; + } + be->netif->copying_receiver = !!rx_copy; if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d", &val) < 0) diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Sun Aug 20 11:08:45 2006 -0400 @@ -58,12 +58,27 @@ #include <xen/interface/memory.h> #include <xen/balloon.h> #include <asm/page.h> +#include <asm/maddr.h> #include <asm/uaccess.h> #include <xen/interface/grant_table.h> #include <xen/gnttab.h> #define RX_COPY_THRESHOLD 256 +/* If we don't have GSO, fake things up so that we never try to use it. */ +#ifndef NETIF_F_GSO +#define netif_needs_gso(dev, skb) 0 +#define dev_disable_gso_features(dev) ((void)0) +#else +#define HAVE_GSO 1 +static inline void dev_disable_gso_features(struct net_device *dev) +{ + /* Turn off all GSO bits except ROBUST. */ + dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; + dev->features |= NETIF_F_GSO_ROBUST; +} +#endif + #define GRANT_INVALID_REF 0 #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) @@ -83,6 +98,7 @@ struct netfront_info { unsigned int handle; unsigned int evtchn, irq; + unsigned int copying_receiver; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 8 @@ -171,7 +187,7 @@ static inline grant_ref_t xennet_get_rx_ static int talk_to_backend(struct xenbus_device *, struct netfront_info *); static int setup_device(struct xenbus_device *, struct netfront_info *); -static struct net_device *create_netdev(int, struct xenbus_device *); +static struct net_device *create_netdev(int, int, struct xenbus_device *); static void netfront_closing(struct xenbus_device *); @@ -213,6 +229,7 @@ static int __devinit netfront_probe(stru struct net_device *netdev; struct netfront_info *info; unsigned int handle; + unsigned feature_rx_copy; err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle); if (err != 1) { @@ -220,7 +237,22 @@ static int __devinit netfront_probe(stru return err; } - netdev = create_netdev(handle, dev); +#ifndef CONFIG_XEN + err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u", + &feature_rx_copy); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading feature-rx-copy"); + return err; + } + if (!feature_rx_copy) { + xenbus_dev_fatal(dev, 0, "need a copy-capable backend"); + return -EINVAL; + } +#else + feature_rx_copy = 0; +#endif + + netdev = create_netdev(handle, feature_rx_copy, dev); if (IS_ERR(netdev)) { err = PTR_ERR(netdev); xenbus_dev_fatal(dev, err, "creating netdev"); @@ -326,6 +358,13 @@ again: goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", + info->copying_receiver); + if (err) { + message = "writing request-rx-copy"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1); if (err) { message = "writing feature-rx-notify"; @@ -338,11 +377,13 @@ again: goto abort_transaction; } +#ifdef HAVE_GSO err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1); if (err) { message = "writing feature-gso-tcpv4"; goto abort_transaction; } +#endif err = xenbus_transaction_end(xbt, 0); if (err) { @@ -415,7 +456,8 @@ static int setup_device(struct xenbus_de memcpy(netdev->dev_addr, info->mac, ETH_ALEN); err = bind_evtchn_to_irqhandler(info->evtchn, netif_int, - SA_SAMPLE_RANDOM, netdev->name, netdev); + SA_SAMPLE_RANDOM, netdev->name, + netdev); if (err < 0) goto fail; info->irq = err; @@ -494,11 +536,14 @@ static int network_open(struct net_devic memset(&np->stats, 0, sizeof(np->stats)); - network_alloc_rx_buffers(dev); - np->rx.sring->rsp_event = np->rx.rsp_cons + 1; - - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) - netif_rx_schedule(dev); + spin_lock(&np->rx_lock); + if (netif_carrier_ok(dev)) { + network_alloc_rx_buffers(dev); + np->rx.sring->rsp_event = np->rx.rsp_cons + 1; + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + netif_rx_schedule(dev); + } + spin_unlock(&np->rx_lock); netif_start_queue(dev); @@ -527,8 +572,7 @@ static void network_tx_buf_gc(struct net struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; - if (unlikely(!netif_carrier_ok(dev))) - return; + BUG_ON(!netif_carrier_ok(dev)); do { prod = np->tx.sring->rsp_prod; @@ -597,6 +641,8 @@ static void network_alloc_rx_buffers(str grant_ref_t ref; unsigned long pfn; void *vaddr; + int nr_flips; + netif_rx_request_t *req; if (unlikely(!netif_carrier_ok(dev))) return; @@ -652,7 +698,7 @@ no_skb: np->rx_target = np->rx_max_target; refill: - for (i = 0; ; i++) { + for (nr_flips = i = 0; ; i++) { if ((skb = __skb_dequeue(&np->rx_batch)) == NULL) break; @@ -663,7 +709,6 @@ no_skb: BUG_ON(np->rx_skbs[id]); np->rx_skbs[id] = skb; - RING_GET_REQUEST(&np->rx, req_prod + i)->id = id; ref = gnttab_claim_grant_reference(&np->gref_rx_head); BUG_ON((signed short)ref < 0); np->grant_rx_ref[id] = ref; @@ -671,49 +716,68 @@ no_skb: pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); vaddr = page_address(skb_shinfo(skb)->frags[0].page); - gnttab_grant_foreign_transfer_ref(ref, - np->xbdev->otherend_id, pfn); - RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref; - np->rx_pfn_array[i] = pfn_to_mfn(pfn); + req = RING_GET_REQUEST(&np->rx, req_prod + i); + if (!np->copying_receiver) { + gnttab_grant_foreign_transfer_ref(ref, + np->xbdev->otherend_id, + pfn); + np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remove this page before passing + * back to Xen. */ + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + MULTI_update_va_mapping(np->rx_mcl+i, + (unsigned long)vaddr, + __pte(0), 0); + } + nr_flips++; + } else { + gnttab_grant_foreign_access_ref(ref, + np->xbdev->otherend_id, + pfn, + 0); + } + + req->id = id; + req->gref = ref; + } + + if ( nr_flips != 0 ) { + /* Tell the ballon driver what is going on. */ + balloon_update_driver_allowance(i); + + set_xen_guest_handle(reservation.extent_start, + np->rx_pfn_array); + reservation.nr_extents = nr_flips; + reservation.extent_order = 0; + reservation.address_bits = 0; + reservation.domid = DOMID_SELF; if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Remove this page before passing back to Xen. */ - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); - MULTI_update_va_mapping(np->rx_mcl+i, - (unsigned long)vaddr, - __pte(0), 0); + /* After all PTEs have been zapped, flush the TLB. */ + np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = + UVMF_TLB_FLUSH|UVMF_ALL; + + /* Give away a batch of pages. */ + np->rx_mcl[i].op = __HYPERVISOR_memory_op; + np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; + np->rx_mcl[i].args[1] = (unsigned long)&reservation; + + /* Zap PTEs and give away pages in one big + * multicall. */ + (void)HYPERVISOR_multicall(np->rx_mcl, i+1); + + /* Check return status of HYPERVISOR_memory_op(). */ + if (unlikely(np->rx_mcl[i].result != i)) + panic("Unable to reduce memory reservation\n"); + } else { + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation) != i) + panic("Unable to reduce memory reservation\n"); } - } - - /* Tell the ballon driver what is going on. */ - balloon_update_driver_allowance(i); - - set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array); - reservation.nr_extents = i; - reservation.extent_order = 0; - reservation.address_bits = 0; - reservation.domid = DOMID_SELF; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* After all PTEs have been zapped, flush the TLB. */ - np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = - UVMF_TLB_FLUSH|UVMF_ALL; - - /* Give away a batch of pages. */ - np->rx_mcl[i].op = __HYPERVISOR_memory_op; - np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; - np->rx_mcl[i].args[1] = (unsigned long)&reservation; - - /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(np->rx_mcl, i+1); - - /* Check return status of HYPERVISOR_memory_op(). */ - if (unlikely(np->rx_mcl[i].result != i)) - panic("Unable to reduce memory reservation\n"); - } else - if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation) != i) - panic("Unable to reduce memory reservation\n"); + } else { + wmb(); + } /* Above is a suitable barrier to ensure backend will see requests. */ np->rx.req_prod_pvt = req_prod + i; @@ -840,9 +904,12 @@ static int network_start_xmit(struct sk_ if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; +#ifdef CONFIG_XEN if (skb->proto_data_valid) /* remote but checksummed? */ tx->flags |= NETTXF_data_validated; - +#endif + +#ifdef HAVE_GSO if (skb_shinfo(skb)->gso_size) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_REQUEST(&np->tx, ++i); @@ -861,6 +928,7 @@ static int network_start_xmit(struct sk_ gso->flags = 0; extra = gso; } +#endif np->tx.req_prod_pvt = i + 1; @@ -896,12 +964,15 @@ static irqreturn_t netif_int(int irq, vo unsigned long flags; spin_lock_irqsave(&np->tx_lock, flags); - network_tx_buf_gc(dev); + + if (likely(netif_carrier_ok(dev))) { + network_tx_buf_gc(dev); + /* Under tx_lock: protects access to rx shared-ring indexes. */ + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + netif_rx_schedule(dev); + } + spin_unlock_irqrestore(&np->tx_lock, flags); - - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) && - likely(netif_running(dev))) - netif_rx_schedule(dev); return IRQ_HANDLED; } @@ -947,8 +1018,10 @@ int xennet_get_extras(struct netfront_in WPRINTK("Invalid extra type: %d\n", extra->type); err = -EINVAL; - } else - memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); + } else { + memcpy(&extras[extra->type - 1], extra, + sizeof(*extra)); + } skb = xennet_get_rx_skb(np, cons); ref = xennet_get_rx_ref(np, cons); @@ -961,10 +1034,12 @@ int xennet_get_extras(struct netfront_in static int xennet_get_responses(struct netfront_info *np, struct netfront_rx_info *rinfo, RING_IDX rp, - struct sk_buff_head *list, int count) -{ - struct mmu_update *mmu = np->rx_mmu + count; - struct multicall_entry *mcl = np->rx_mcl + count; + struct sk_buff_head *list, + int *pages_flipped_p) +{ + int pages_flipped = *pages_flipped_p; + struct mmu_update *mmu; + struct multicall_entry *mcl; struct netif_rx_response *rx = &rinfo->rx; struct netif_extra_info *extras = rinfo->extras; RING_IDX cons = np->rx.rsp_cons; @@ -973,6 +1048,7 @@ static int xennet_get_responses(struct n int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); int frags = 1; int err = 0; + unsigned long ret; if (rx->flags & NETRXF_extra_info) { err = xennet_get_extras(np, extras, rp); @@ -988,6 +1064,7 @@ static int xennet_get_responses(struct n WPRINTK("rx->offset: %x, size: %u\n", rx->offset, rx->status); err = -EINVAL; + goto next; } /* @@ -1001,35 +1078,47 @@ static int xennet_get_responses(struct n goto next; } - /* Memory pressure, insufficient buffer headroom, ... */ - if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) { - if (net_ratelimit()) - WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", - rx->id, rx->status); - xennet_move_rx_slot(np, skb, ref); - err = -ENOMEM; - goto next; + if (!np->copying_receiver) { + /* Memory pressure, insufficient buffer + * headroom, ... */ + if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { + if (net_ratelimit()) + WPRINTK("Unfulfilled rx req " + "(id=%d, st=%d).\n", + rx->id, rx->status); + xennet_move_rx_slot(np, skb, ref); + err = -ENOMEM; + goto next; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + struct page *page = + skb_shinfo(skb)->frags[0].page; + unsigned long pfn = page_to_pfn(page); + void *vaddr = page_address(page); + + mcl = np->rx_mcl + pages_flipped; + mmu = np->rx_mmu + pages_flipped; + + MULTI_update_va_mapping(mcl, + (unsigned long)vaddr, + pfn_pte_ma(mfn, + PAGE_KERNEL), + 0); + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + + set_phys_to_machine(pfn, mfn); + } + pages_flipped++; + } else { + ret = gnttab_end_foreign_access_ref(ref, 0); + BUG_ON(!ret); } gnttab_release_grant_reference(&np->gref_rx_head, ref); - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Remap the page. */ - struct page *page = skb_shinfo(skb)->frags[0].page; - unsigned long pfn = page_to_pfn(page); - void *vaddr = page_address(page); - - MULTI_update_va_mapping(mcl, (unsigned long)vaddr, - pfn_pte_ma(mfn, PAGE_KERNEL), - 0); - mcl++; - mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) - | MMU_MACHPHYS_UPDATE; - mmu->val = pfn; - mmu++; - - set_phys_to_machine(pfn, mfn); - } __skb_queue_tail(list, skb); @@ -1056,6 +1145,8 @@ next: err = -E2BIG; } + *pages_flipped_p = pages_flipped; + return err; } @@ -1090,7 +1181,8 @@ static RING_IDX xennet_fill_frags(struct return cons; } -static int xennet_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) +static int xennet_set_skb_gso(struct sk_buff *skb, + struct netif_extra_info *gso) { if (!gso->u.gso.size) { if (net_ratelimit()) @@ -1105,6 +1197,7 @@ static int xennet_set_skb_gso(struct sk_ return -EINVAL; } +#ifdef HAVE_GSO skb_shinfo(skb)->gso_size = gso->u.gso.size; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; @@ -1113,6 +1206,11 @@ static int xennet_set_skb_gso(struct sk_ skb_shinfo(skb)->gso_segs = 0; return 0; +#else + if (net_ratelimit()) + WPRINTK("GSO unsupported by this kernel.\n"); + return -EINVAL; +#endif } static int netif_poll(struct net_device *dev, int *pbudget) @@ -1130,7 +1228,7 @@ static int netif_poll(struct net_device struct sk_buff_head tmpq; unsigned long flags; unsigned int len; - int pages_done; + int pages_flipped = 0; int err; spin_lock(&np->rx_lock); @@ -1149,14 +1247,14 @@ static int netif_poll(struct net_device rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0; + for (i = np->rx.rsp_cons, work_done = 0; (i != rp) && (work_done < budget); np->rx.rsp_cons = ++i, work_done++) { memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); memset(extras, 0, sizeof(extras)); - err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done); - pages_done += skb_queue_len(&tmpq); + err = xennet_get_responses(np, &rinfo, rp, &tmpq, + &pages_flipped); if (unlikely(err)) { err: @@ -1200,26 +1298,26 @@ err: i = xennet_fill_frags(np, skb, &tmpq); - /* - * Truesize must approximates the size of true data plus - * any supervisor overheads. Adding hypervisor overheads - * has been shown to significantly reduce achievable - * bandwidth with the default receive buffer size. It is - * therefore not wise to account for it here. - * - * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to - * RX_COPY_THRESHOLD + the supervisor overheads. Here, we - * add the size of the data pulled in xennet_fill_frags(). - * - * We also adjust for any unused space in the main data - * area by subtracting (RX_COPY_THRESHOLD - len). This is - * especially important with drivers which split incoming - * packets into header and data, using only 66 bytes of - * the main data area (see the e1000 driver for example.) - * On such systems, without this last adjustement, our - * achievable receive throughout using the standard receive - * buffer size was cut by 25%(!!!). - */ + /* + * Truesize must approximates the size of true data plus + * any supervisor overheads. Adding hypervisor overheads + * has been shown to significantly reduce achievable + * bandwidth with the default receive buffer size. It is + * therefore not wise to account for it here. + * + * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to + * RX_COPY_THRESHOLD + the supervisor overheads. Here, we + * add the size of the data pulled in xennet_fill_frags(). + * + * We also adjust for any unused space in the main data + * area by subtracting (RX_COPY_THRESHOLD - len). This is + * especially important with drivers which split incoming + * packets into header and data, using only 66 bytes of + * the main data area (see the e1000 driver for example.) + * On such systems, without this last adjustement, our + * achievable receive throughout using the standard receive + * buffer size was cut by 25%(!!!). + */ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); skb->len += skb->data_len; @@ -1227,33 +1325,35 @@ err: * Old backends do not assert data_validated but we * can infer it from csum_blank so test both flags. */ - if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) { + if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->proto_data_valid = 1; - } else { + else skb->ip_summed = CHECKSUM_NONE; - skb->proto_data_valid = 0; - } +#ifdef CONFIG_XEN + skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE); skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank); - +#endif np->stats.rx_packets++; np->stats.rx_bytes += skb->len; __skb_queue_tail(&rxq, skb); } - /* Some pages are no longer absent... */ - balloon_update_driver_allowance(-pages_done); - - /* Do all the remapping work, and M2P updates, in one big hypercall. */ - if (likely(pages_done)) { - mcl = np->rx_mcl + pages_done; - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)np->rx_mmu; - mcl->args[1] = pages_done; - mcl->args[2] = 0; - mcl->args[3] = DOMID_SELF; - (void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1); + if (pages_flipped) { + /* Some pages are no longer absent... */ + balloon_update_driver_allowance(-pages_flipped); + + /* Do all the remapping work and M2P updates. */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = np->rx_mcl + pages_flipped; + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)np->rx_mmu; + mcl->args[1] = pages_flipped; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + (void)HYPERVISOR_multicall(np->rx_mcl, + pages_flipped + 1); + } } while ((skb = __skb_dequeue(&errq))) @@ -1304,97 +1404,12 @@ err: return more_to_do; } - -static int network_close(struct net_device *dev) -{ - struct netfront_info *np = netdev_priv(dev); - netif_stop_queue(np->netdev); - return 0; -} - - -static struct net_device_stats *network_get_stats(struct net_device *dev) -{ - struct netfront_info *np = netdev_priv(dev); - return &np->stats; -} - -static int xennet_change_mtu(struct net_device *dev, int mtu) -{ - int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; - - if (mtu > max) - return -EINVAL; - dev->mtu = mtu; - return 0; -} - -static int xennet_set_sg(struct net_device *dev, u32 data) -{ - if (data) { - struct netfront_info *np = netdev_priv(dev); - int val; - - if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", - "%d", &val) < 0) - val = 0; - if (!val) - return -ENOSYS; - } else if (dev->mtu > ETH_DATA_LEN) - dev->mtu = ETH_DATA_LEN; - - return ethtool_op_set_sg(dev, data); -} - -static int xennet_set_tso(struct net_device *dev, u32 data) -{ - if (data) { - struct netfront_info *np = netdev_priv(dev); - int val; - - if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, - "feature-gso-tcpv4", "%d", &val) < 0) - val = 0; - if (!val) - return -ENOSYS; - } - - return ethtool_op_set_tso(dev, data); -} - -static void xennet_set_features(struct net_device *dev) -{ - /* Turn off all GSO bits except ROBUST. */ - dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; - dev->features |= NETIF_F_GSO_ROBUST; - xennet_set_sg(dev, 0); - - if (!xennet_set_sg(dev, 1)) - xennet_set_tso(dev, 1); -} - -static void network_connect(struct net_device *dev) -{ - struct netfront_info *np = netdev_priv(dev); - int i, requeue_idx; +static void netif_release_tx_bufs(struct netfront_info *np) +{ struct sk_buff *skb; - grant_ref_t ref; - - xennet_set_features(dev); - - spin_lock_irq(&np->tx_lock); - spin_lock(&np->rx_lock); - - /* - * Recovery procedure: - * NB. Freelist index entries are always going to be less than - * PAGE_OFFSET, whereas pointers to skbs will always be equal or - * greater than PAGE_OFFSET: we use this property to distinguish - * them. - */ - - /* Step 1: Discard all pending TX packet fragments. */ - for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) { + int i; + + for (i = 1; i <= NET_TX_RING_SIZE; i++) { if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET) continue; @@ -1407,6 +1422,191 @@ static void network_connect(struct net_d add_id_to_freelist(np->tx_skbs, i); dev_kfree_skb_irq(skb); } +} + +static void netif_release_rx_bufs(struct netfront_info *np) +{ + struct mmu_update *mmu = np->rx_mmu; + struct multicall_entry *mcl = np->rx_mcl; + struct sk_buff_head free_list; + struct sk_buff *skb; + unsigned long mfn; + int xfer = 0, noxfer = 0, unused = 0; + int id, ref; + + if (np->copying_receiver) { + printk("%s: fix me for copying receiver.\n", __FUNCTION__); + return; + } + + skb_queue_head_init(&free_list); + + spin_lock(&np->rx_lock); + + for (id = 0; id < NET_RX_RING_SIZE; id++) { + if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) { + unused++; + continue; + } + + skb = np->rx_skbs[id]; + mfn = gnttab_end_foreign_transfer_ref(ref); + gnttab_release_grant_reference(&np->gref_rx_head, ref); + np->grant_rx_ref[id] = GRANT_INVALID_REF; + add_id_to_freelist(np->rx_skbs, id); + + if (0 == mfn) { + struct page *page = skb_shinfo(skb)->frags[0].page; + balloon_release_driver_page(page); + skb_shinfo(skb)->nr_frags = 0; + dev_kfree_skb(skb); + noxfer++; + continue; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + struct page *page = skb_shinfo(skb)->frags[0].page; + unsigned long pfn = page_to_pfn(page); + void *vaddr = page_address(page); + + MULTI_update_va_mapping(mcl, (unsigned long)vaddr, + pfn_pte_ma(mfn, PAGE_KERNEL), + 0); + mcl++; + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + mmu++; + + set_phys_to_machine(pfn, mfn); + } + __skb_queue_tail(&free_list, skb); + xfer++; + } + + printk("%s: %d xfer, %d noxfer, %d unused\n", + __FUNCTION__, xfer, noxfer, unused); + + if (xfer) { + /* Some pages are no longer absent... */ + balloon_update_driver_allowance(-xfer); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Do all the remapping work and M2P updates. */ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)np->rx_mmu; + mcl->args[1] = mmu - np->rx_mmu; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + mcl++; + HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); + } + } + + while ((skb = __skb_dequeue(&free_list)) != NULL) + dev_kfree_skb(skb); + + spin_unlock(&np->rx_lock); +} + +static int network_close(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + netif_stop_queue(np->netdev); + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + return &np->stats; +} + +static int xennet_change_mtu(struct net_device *dev, int mtu) +{ + int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; + dev->mtu = mtu; + return 0; +} + +static int xennet_set_sg(struct net_device *dev, u32 data) +{ + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", + "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } else if (dev->mtu > ETH_DATA_LEN) + dev->mtu = ETH_DATA_LEN; + + return ethtool_op_set_sg(dev, data); +} + +static int xennet_set_tso(struct net_device *dev, u32 data) +{ +#ifdef HAVE_GSO + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-gso-tcpv4", "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } + + return ethtool_op_set_tso(dev, data); +#else + return -ENOSYS; +#endif +} + +static void xennet_set_features(struct net_device *dev) +{ + dev_disable_gso_features(dev); + xennet_set_sg(dev, 0); + + /* We need checksum offload to enable scatter/gather and TSO. */ + if (!(dev->features & NETIF_F_ALL_CSUM)) + return; + + if (!xennet_set_sg(dev, 1)) + xennet_set_tso(dev, 1); +} + +static void network_connect(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + int i, requeue_idx; + struct sk_buff *skb; + grant_ref_t ref; + netif_rx_request_t *req; + + xennet_set_features(dev); + + spin_lock_irq(&np->tx_lock); + spin_lock(&np->rx_lock); + + /* + * Recovery procedure: + * NB. Freelist index entries are always going to be less than + * PAGE_OFFSET, whereas pointers to skbs will always be equal or + * greater than PAGE_OFFSET: we use this property to distinguish + * them. + */ + + /* Step 1: Discard all pending TX packet fragments. */ + netif_release_tx_bufs(np); /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { @@ -1415,13 +1615,20 @@ static void network_connect(struct net_d skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i); ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); - - gnttab_grant_foreign_transfer_ref( - ref, np->xbdev->otherend_id, - page_to_pfn(skb_shinfo(skb)->frags->page)); - - RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref; - RING_GET_REQUEST(&np->rx, requeue_idx)->id = requeue_idx; + req = RING_GET_REQUEST(&np->rx, requeue_idx); + + if (!np->copying_receiver) { + gnttab_grant_foreign_transfer_ref( + ref, np->xbdev->otherend_id, + page_to_pfn(skb_shinfo(skb)->frags->page)); + } else { + gnttab_grant_foreign_access_ref( + ref, np->xbdev->otherend_id, + page_to_pfn(skb_shinfo(skb)->frags->page), + 0); + } + req->gref = ref; + req->id = requeue_idx; requeue_idx++; } @@ -1446,6 +1653,8 @@ static void netif_uninit(struct net_devi static void netif_uninit(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); + netif_release_tx_bufs(np); + netif_release_rx_bufs(np); gnttab_free_grant_references(np->gref_tx_head); gnttab_free_grant_references(np->gref_rx_head); } @@ -1604,13 +1813,8 @@ static void network_set_multicast_list(s { } -/** Create a network device. - * @param handle device handle - * @param val return parameter for created device - * @return 0 on success, error code otherwise - */ -static struct net_device * __devinit create_netdev(int handle, - struct xenbus_device *dev) +static struct net_device * __devinit +create_netdev(int handle, int copying_receiver, struct xenbus_device *dev) { int i, err = 0; struct net_device *netdev = NULL; @@ -1623,9 +1827,10 @@ static struct net_device * __devinit cre return ERR_PTR(-ENOMEM); } - np = netdev_priv(netdev); - np->handle = handle; - np->xbdev = dev; + np = netdev_priv(netdev); + np->handle = handle; + np->xbdev = dev; + np->copying_receiver = copying_receiver; netif_carrier_off(netdev); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Sun Aug 20 11:08:45 2006 -0400 @@ -181,6 +181,7 @@ static void pciback_frontend_changed(str xenbus_switch_state(xdev, XenbusStateClosing); break; + case XenbusStateUnknown: case XenbusStateClosed: dev_dbg(&xdev->dev, "frontend is gone! unregister device\n"); device_unregister(&xdev->dev); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c Sun Aug 20 11:08:45 2006 -0400 @@ -225,6 +225,7 @@ static void pcifront_backend_changed(str pcifront_try_disconnect(pdev); break; + case XenbusStateUnknown: case XenbusStateClosed: dev_warn(&xdev->dev, "backend went away!\n"); pcifront_try_disconnect(pdev); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Sun Aug 20 11:08:45 2006 -0400 @@ -108,13 +108,15 @@ static int privcmd_ioctl(struct inode *i } break; -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) case IOCTL_PRIVCMD_MMAP: { #define PRIVCMD_MMAP_SZ 32 privcmd_mmap_t mmapcmd; privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ]; privcmd_mmap_entry_t __user *p; int i, rc; + + if (!is_initial_xendomain()) + return -EPERM; if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) return -EFAULT; @@ -162,8 +164,11 @@ static int privcmd_ioctl(struct inode *i privcmd_mmapbatch_t m; struct vm_area_struct *vma = NULL; xen_pfn_t __user *p; - unsigned long addr, mfn; + unsigned long addr, mfn; int i; + + if (!is_initial_xendomain()) + return -EPERM; if (copy_from_user(&m, udata, sizeof(m))) { ret = -EFAULT; @@ -215,7 +220,6 @@ static int privcmd_ioctl(struct inode *i break; } break; -#endif default: ret = -EINVAL; diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sun Aug 20 11:08:45 2006 -0400 @@ -34,7 +34,6 @@ struct backend_info /* watch front end for changes */ struct xenbus_watch backend_watch; - enum xenbus_state frontend_state; }; static void maybe_connect(struct backend_info *be); @@ -143,8 +142,6 @@ static void frontend_changed(struct xenb struct backend_info *be = dev->dev.driver_data; int err; - be->frontend_state = frontend_state; - switch (frontend_state) { case XenbusStateInitialising: case XenbusStateInitialised: @@ -162,13 +159,12 @@ static void frontend_changed(struct xenb be->instance = -1; break; + case XenbusStateUnknown: case XenbusStateClosed: device_unregister(&be->dev->dev); tpmback_remove(dev); break; - case XenbusStateUnknown: - case XenbusStateInitWait: default: xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Sun Aug 20 11:08:45 2006 -0400 @@ -274,7 +274,7 @@ enum xenbus_state xenbus_read_driver_sta enum xenbus_state result; int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); if (err) - result = XenbusStateClosed; + result = XenbusStateUnknown; return result; } diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Sun Aug 20 11:08:45 2006 -0400 @@ -346,7 +346,7 @@ static struct file_operations xenbus_dev .poll = xenbus_dev_poll, }; -static int __init +int __init xenbus_dev_init(void) { xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400); @@ -355,5 +355,3 @@ xenbus_dev_init(void) return 0; } - -__initcall(xenbus_dev_init); diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sun Aug 20 11:08:45 2006 -0400 @@ -45,12 +45,14 @@ #include <asm/io.h> #include <asm/page.h> +#include <asm/maddr.h> #include <asm/pgtable.h> #include <asm/hypervisor.h> #include <xen/xenbus.h> #include <xen/xen_proc.h> #include <xen/evtchn.h> #include <xen/features.h> +#include <xen/hvm.h> #include "xenbus_comms.h" @@ -63,6 +65,14 @@ static struct notifier_block *xenstore_c static struct notifier_block *xenstore_chain; static void wait_for_devices(struct xenbus_driver *xendrv); + +static int xenbus_probe_frontend(const char *type, const char *name); +static int xenbus_uevent_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size); +static int xenbus_probe_backend(const char *type, const char *domid); + +static int xenbus_dev_probe(struct device *_dev); +static int xenbus_dev_remove(struct device *_dev); /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * @@ -172,15 +182,16 @@ static int read_frontend_details(struct /* Bus type for frontend drivers. */ -static int xenbus_probe_frontend(const char *type, const char *name); static struct xen_bus_type xenbus_frontend = { .root = "device", .levels = 2, /* device/type/<id> */ .get_bus_id = frontend_bus_id, .probe = xenbus_probe_frontend, .bus = { - .name = "xen", - .match = xenbus_match, + .name = "xen", + .match = xenbus_match, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, }, .dev = { .bus_id = "xen", @@ -225,18 +236,17 @@ static int backend_bus_id(char bus_id[BU return 0; } -static int xenbus_uevent_backend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size); -static int xenbus_probe_backend(const char *type, const char *domid); static struct xen_bus_type xenbus_backend = { .root = "backend", .levels = 3, /* backend/type/<frontend>/<id> */ .get_bus_id = backend_bus_id, .probe = xenbus_probe_backend, .bus = { - .name = "xen-backend", - .match = xenbus_match, - .uevent = xenbus_uevent_backend, + .name = "xen-backend", + .match = xenbus_match, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .uevent = xenbus_uevent_backend, }, .dev = { .bus_id = "xen-backend", @@ -403,8 +413,6 @@ static int xenbus_register_driver_common drv->driver.name = drv->name; drv->driver.bus = &bus->bus; drv->driver.owner = drv->owner; - drv->driver.probe = xenbus_dev_probe; - drv->driver.remove = xenbus_dev_remove; mutex_lock(&xenwatch_mutex); ret = driver_register(&drv->driver); @@ -844,7 +852,7 @@ static int resume_dev(struct device *dev printk(KERN_WARNING "xenbus: resume %s failed: %i\n", dev->bus_id, err); - return err; + return err; } } @@ -856,7 +864,7 @@ static int resume_dev(struct device *dev return err; } - return 0; + return 0; } void xenbus_suspend(void) @@ -962,7 +970,7 @@ static int xsd_port_read(char *page, cha static int __init xenbus_probe_init(void) { - int err = 0, dom0; + int err = 0; unsigned long page = 0; DPRINTK(""); @@ -977,9 +985,7 @@ static int __init xenbus_probe_init(void /* * Domain0 doesn't have a store_evtchn or store_mfn yet. */ - dom0 = (xen_start_info->store_evtchn == 0); - - if (dom0) { + if (is_initial_xendomain()) { struct evtchn_alloc_unbound alloc_unbound; /* Allocate page. */ @@ -1017,13 +1023,23 @@ static int __init xenbus_probe_init(void if (xsd_port_intf) xsd_port_intf->read_proc = xsd_port_read; #endif + xen_store_interface = mfn_to_virt(xen_store_mfn); } else { xenstored_ready = 1; +#ifdef CONFIG_XEN xen_store_evtchn = xen_start_info->store_evtchn; xen_store_mfn = xen_start_info->store_mfn; - } - - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_interface = mfn_to_virt(xen_store_mfn); +#else + xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); + xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, + PAGE_SIZE); +#endif + } + + + xenbus_dev_init(); /* Initialize the interface to xenstore. */ err = xs_init(); @@ -1037,7 +1053,7 @@ static int __init xenbus_probe_init(void device_register(&xenbus_frontend.dev); device_register(&xenbus_backend.dev); - if (!dom0) + if (!is_initial_xendomain()) xenbus_probe(NULL); return 0; @@ -1056,6 +1072,8 @@ static int __init xenbus_probe_init(void } postcore_initcall(xenbus_probe_init); + +MODULE_LICENSE("Dual BSD/GPL"); static int is_disconnected_device(struct device *dev, void *data) @@ -1140,6 +1158,7 @@ static void wait_for_devices(struct xenb print_device_status); } +#ifndef MODULE static int __init boot_wait_for_devices(void) { ready_to_wait_for_devices = 1; @@ -1148,3 +1167,4 @@ static int __init boot_wait_for_devices( } late_initcall(boot_wait_for_devices); +#endif diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Sun Aug 20 11:08:45 2006 -0400 @@ -665,7 +665,17 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watc void xs_suspend(void) { + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + down_write(&xs_state.suspend_mutex); + + /* No need for watches_lock: the suspend_mutex is sufficient. */ + list_for_each_entry(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_unwatch(watch->node, token); + } + mutex_lock(&xs_state.request_mutex); } diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Sun Aug 20 11:08:45 2006 -0400 @@ -58,7 +58,11 @@ extern shared_info_t *HYPERVISOR_shared_ /* arch/xen/i386/kernel/setup.c */ extern start_info_t *xen_start_info; +#ifdef CONFIG_XEN_PRIVILEGED_GUEST #define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN) +#else +#define is_initial_xendomain() 0 +#endif /* arch/xen/kernel/evtchn.c */ /* Force a proper event-channel callback from Xen. */ @@ -199,6 +203,16 @@ MULTI_update_va_mapping( } static inline void +MULTI_grant_table_op(multicall_entry_t *mcl, unsigned int cmd, + void *uop, unsigned int count) +{ + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)uop; + mcl->args[2] = count; +} + +static inline void MULTI_update_va_mapping_otherdomain( multicall_entry_t *mcl, unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/include/xen/balloon.h --- a/linux-2.6-xen-sparse/include/xen/balloon.h Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/include/xen/balloon.h Sun Aug 20 11:08:45 2006 -0400 @@ -52,6 +52,8 @@ balloon_dealloc_empty_page_range( balloon_dealloc_empty_page_range( struct page *page, unsigned long nr_pages); +void balloon_release_driver_page(struct page *page); + /* * Prevent the balloon driver from changing the memory reservation during * a driver critical region. diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/include/xen/hvm.h --- a/linux-2.6-xen-sparse/include/xen/hvm.h Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/include/xen/hvm.h Sun Aug 20 11:08:45 2006 -0400 @@ -8,10 +8,17 @@ static inline unsigned long hvm_get_para static inline unsigned long hvm_get_parameter(int idx) { struct xen_hvm_param xhv; + int r; xhv.domid = DOMID_SELF; xhv.index = idx; - return HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (r < 0) { + printk(KERN_ERR "cannot get hvm parameter %d: %d.\n", + idx, r); + return 0; + } + return xhv.value; } #endif /* XEN_HVM_H__ */ diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Sun Aug 20 11:07:52 2006 -0400 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Sun Aug 20 11:08:45 2006 -0400 @@ -274,7 +274,7 @@ int xenbus_free_evtchn(struct xenbus_dev /** * Return the state of the driver rooted at the given store path, or - * XenbusStateClosed if no state can be read. + * XenbusStateUnknown if no state can be read. */ enum xenbus_state xenbus_read_driver_state(const char *path); @@ -295,5 +295,6 @@ void xenbus_dev_fatal(struct xenbus_devi void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); +int __init xenbus_dev_init(void); #endif /* _XEN_XENBUS_H */ diff -r 96d6f9cfed6e -r 4cffec02b478 tools/blktap/lib/Makefile --- a/tools/blktap/lib/Makefile Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/blktap/lib/Makefile Sun Aug 20 11:08:45 2006 -0400 @@ -61,7 +61,7 @@ libblktap.a: $(OBJS) -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) ln -sf libblktap.so.$(MAJOR) libblktap.so - ar rc $@ libblktap.so + $(AR) rc $@ libblktap.so .PHONY: TAGS all build clean install libblktap diff -r 96d6f9cfed6e -r 4cffec02b478 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/examples/xmexample.hvm Sun Aug 20 11:08:45 2006 -0400 @@ -26,6 +26,10 @@ builder='hvm' # memory errors. The domain needs enough memory to boot kernel # and modules. Allocating less than 32MBs is not recommended. memory = 128 + +# Shadow pagetable memory for the domain, in MB. +# Should be at least 2KB per MB of domain memory, plus a few MB per vcpu. +shadow_memory = 8 # A name for your domain. All domains must have different names. name = "ExampleHVMDomain" diff -r 96d6f9cfed6e -r 4cffec02b478 tools/firmware/rombios/apmbios.S --- a/tools/firmware/rombios/apmbios.S Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/firmware/rombios/apmbios.S Sun Aug 20 11:08:45 2006 -0400 @@ -225,7 +225,10 @@ APMSYM(05): APMSYM(05): cmp al, #0x05 jne APMSYM(07) + pushf ; XEN + sti ; XEN: OS calls us with ints disabled -- better re-enable here! hlt + popf ; XEN jmp APMSYM(ok) ;----------------- diff -r 96d6f9cfed6e -r 4cffec02b478 tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/firmware/rombios/rombios.c Sun Aug 20 11:08:45 2006 -0400 @@ -9459,7 +9459,7 @@ smbios_init: mov cx, #0x001f ; 0x1f bytes to copy mov ax, #0xf000 mov es, ax ; destination segment is 0xf0000 - mov di, smbios_entry_point ; destination offset + mov di, #smbios_entry_point ; destination offset mov ax, #0x9f00 mov ds, ax ; source segment is 0x9f000 mov si, #0x0000 ; source offset is 0 diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/Makefile.target --- a/tools/ioemu/Makefile.target Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/Makefile.target Sun Aug 20 11:08:45 2006 -0400 @@ -359,6 +359,7 @@ VL_OBJS+= usb-uhci.o VL_OBJS+= usb-uhci.o VL_OBJS+= piix4acpi.o VL_OBJS+= xenstore.o +VL_OBJS+= xen_platform.o DEFINES += -DHAS_AUDIO endif ifeq ($(TARGET_BASE_ARCH), ppc) diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/hw/pc.c Sun Aug 20 11:08:45 2006 -0400 @@ -823,6 +823,9 @@ static void pc_init1(uint64_t ram_size, } #endif /* !CONFIG_DM */ + if (pci_enabled) + pci_xen_platform_init(pci_bus); + for(i = 0; i < MAX_SERIAL_PORTS; i++) { if (serial_hds[i]) { serial_init(&pic_set_irq_new, isa_pic, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/hw/piix_pci.c --- a/tools/ioemu/hw/piix_pci.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/hw/piix_pci.c Sun Aug 20 11:08:45 2006 -0400 @@ -415,7 +415,7 @@ void pci_bios_init(void) uint8_t elcr[2]; pci_bios_io_addr = 0xc000; - pci_bios_mem_addr = 0xf0000000; + pci_bios_mem_addr = HVM_BELOW_4G_MMIO_START; /* activate IRQ mappings */ elcr[0] = 0x00; diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/acpi-poweroff-support --- a/tools/ioemu/patches/acpi-poweroff-support Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/acpi-poweroff-support Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/piix4acpi.c Index: ioemu/hw/piix4acpi.c =================================================================== ---- ioemu.orig/hw/piix4acpi.c 2006-08-06 02:30:29.288761563 +0100 -+++ ioemu/hw/piix4acpi.c 2006-08-06 02:30:42.131331446 +0100 +--- ioemu.orig/hw/piix4acpi.c 2006-08-17 19:50:05.060576667 +0100 ++++ ioemu/hw/piix4acpi.c 2006-08-17 19:50:07.563300039 +0100 @@ -45,6 +45,10 @@ #define GBL_RLS (1 << 2) #define SLP_EN (1 << 13) diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/acpi-support --- a/tools/ioemu/patches/acpi-support Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/acpi-support Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-09 19:54:26.055548240 +0100 -+++ ioemu/Makefile.target 2006-08-09 21:29:37.834611244 +0100 +--- ioemu.orig/Makefile.target 2006-08-17 19:49:50.228216099 +0100 ++++ ioemu/Makefile.target 2006-08-17 19:50:02.405870095 +0100 @@ -357,6 +357,7 @@ VL_OBJS+= fdc.o mc146818rtc.o serial.o pc.o VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o @@ -12,8 +12,8 @@ Index: ioemu/Makefile.target ifeq ($(TARGET_BASE_ARCH), ppc) Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-09 19:54:26.133539447 +0100 -+++ ioemu/hw/pc.c 2006-08-09 21:30:30.188733212 +0100 +--- ioemu.orig/hw/pc.c 2006-08-17 19:49:59.312212039 +0100 ++++ ioemu/hw/pc.c 2006-08-17 19:50:02.406869984 +0100 @@ -874,13 +874,19 @@ cmos_init(ram_size, boot_device, bs_table, timeoffset); @@ -49,7 +49,7 @@ Index: ioemu/hw/piix4acpi.c Index: ioemu/hw/piix4acpi.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/hw/piix4acpi.c 2006-08-09 21:32:04.400129788 +0100 ++++ ioemu/hw/piix4acpi.c 2006-08-17 19:50:02.407869874 +0100 @@ -0,0 +1,388 @@ +/* + * PIIX4 ACPI controller emulation @@ -441,8 +441,8 @@ Index: ioemu/hw/piix4acpi.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-09 19:54:26.135539222 +0100 -+++ ioemu/vl.c 2006-08-09 21:29:38.067585110 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:49:59.315211708 +0100 ++++ ioemu/vl.c 2006-08-17 19:50:02.410869542 +0100 @@ -156,7 +156,7 @@ #else #define MAX_CPUS 1 @@ -488,9 +488,9 @@ Index: ioemu/vl.c } Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-09 19:54:26.136539109 +0100 -+++ ioemu/vl.h 2006-08-09 21:31:21.772931536 +0100 -@@ -167,6 +167,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:49:59.316211597 +0100 ++++ ioemu/vl.h 2006-08-17 19:50:02.411869432 +0100 +@@ -168,6 +168,7 @@ extern int kqemu_allowed; extern int win2k_install_hack; extern int usb_enabled; @@ -498,7 +498,7 @@ Index: ioemu/vl.h extern int smp_cpus; /* XXX: make it dynamic */ -@@ -922,6 +923,9 @@ +@@ -923,6 +924,9 @@ void piix4_pm_init(PCIBus *bus, int devfn); void acpi_bios_init(void); @@ -510,8 +510,8 @@ Index: ioemu/vl.h extern QEMUMachine isapc_machine; Index: ioemu/hw/piix_pci.c =================================================================== ---- ioemu.orig/hw/piix_pci.c 2006-08-09 19:54:19.636318228 +0100 -+++ ioemu/hw/piix_pci.c 2006-08-09 19:54:26.152537305 +0100 +--- ioemu.orig/hw/piix_pci.c 2006-08-17 19:38:05.806252180 +0100 ++++ ioemu/hw/piix_pci.c 2006-08-17 19:50:02.411869432 +0100 @@ -241,7 +241,7 @@ static uint32_t pci_bios_io_addr; static uint32_t pci_bios_mem_addr; diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/acpi-timer-support --- a/tools/ioemu/patches/acpi-timer-support Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/acpi-timer-support Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/piix4acpi.c Index: ioemu/hw/piix4acpi.c =================================================================== ---- ioemu.orig/hw/piix4acpi.c 2006-08-09 20:00:56.118008198 +0100 -+++ ioemu/hw/piix4acpi.c 2006-08-09 20:04:54.375299065 +0100 +--- ioemu.orig/hw/piix4acpi.c 2006-08-17 19:50:02.407869874 +0100 ++++ ioemu/hw/piix4acpi.c 2006-08-17 19:50:05.060576667 +0100 @@ -24,31 +24,30 @@ */ @@ -184,7 +184,7 @@ Index: ioemu/hw/piix4acpi.c } - - /* PIIX4 acpi pci configuration space, func 3 */ + /* PIIX4 acpi pci configuration space, func 2 */ void pci_piix4_acpi_init(PCIBus *bus, int devfn) @@ -384,5 +383,5 @@ pci_register_io_region((PCIDevice *)d, 4, 0x10, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/domain-destroy --- a/tools/ioemu/patches/domain-destroy Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/domain-destroy Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/monitor.c Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-08 11:27:48.555190337 +0100 -+++ ioemu/monitor.c 2006-08-08 11:27:53.984584612 +0100 +--- ioemu.orig/monitor.c 2006-08-17 19:37:36.489509621 +0100 ++++ ioemu/monitor.c 2006-08-17 19:49:44.491850141 +0100 @@ -308,6 +308,7 @@ static void do_quit(void) @@ -12,8 +12,8 @@ Index: ioemu/monitor.c Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-08-08 11:27:53.063687351 +0100 -+++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:27:54.011581601 +0100 +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-17 19:49:40.116333768 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-17 19:49:44.491850141 +0100 @@ -488,5 +488,25 @@ xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]); } @@ -42,9 +42,9 @@ Index: ioemu/target-i386-dm/helper2.c +} Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-08 11:27:53.067686905 +0100 -+++ ioemu/vl.h 2006-08-08 11:27:54.061576023 +0100 -@@ -1189,4 +1189,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:49:40.120333326 +0100 ++++ ioemu/vl.h 2006-08-17 19:49:44.492850031 +0100 +@@ -1190,4 +1190,7 @@ void kqemu_record_dump(void); extern char domain_name[]; diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/domain-reset --- a/tools/ioemu/patches/domain-reset Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/domain-reset Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/target-i386-dm/helper2.c Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-08-08 11:27:45.566523765 +0100 -+++ ioemu/target-i386-dm/helper2.c 2006-08-08 11:27:53.063687351 +0100 +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-17 19:37:36.530505066 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-17 19:49:40.116333768 +0100 @@ -127,6 +127,25 @@ /* called from main_cpu_reset */ void cpu_reset(CPUX86State *env) @@ -41,8 +41,8 @@ Index: ioemu/target-i386-dm/helper2.c /* Wait up to 10 msec. */ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-08 11:27:52.994695048 +0100 -+++ ioemu/vl.c 2006-08-08 11:27:53.066687017 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:49:39.442408257 +0100 ++++ ioemu/vl.c 2006-08-17 19:49:40.119333436 +0100 @@ -4948,7 +4948,7 @@ } QEMUResetEntry; @@ -54,9 +54,9 @@ Index: ioemu/vl.c Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-08 11:27:48.757167803 +0100 -+++ ioemu/vl.h 2006-08-08 11:27:53.067686905 +0100 -@@ -130,6 +130,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:47:32.680418959 +0100 ++++ ioemu/vl.h 2006-08-17 19:49:40.120333326 +0100 +@@ -131,6 +131,7 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque); void qemu_system_reset_request(void); @@ -64,7 +64,7 @@ Index: ioemu/vl.h void qemu_system_shutdown_request(void); void qemu_system_powerdown_request(void); #if !defined(TARGET_SPARC) -@@ -139,6 +140,8 @@ +@@ -140,6 +141,8 @@ void qemu_system_powerdown(void); #endif diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/domain-timeoffset --- a/tools/ioemu/patches/domain-timeoffset Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/domain-timeoffset Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/mc146818rtc.c Index: ioemu/hw/mc146818rtc.c =================================================================== ---- ioemu.orig/hw/mc146818rtc.c 2006-08-09 21:32:18.709516404 +0100 -+++ ioemu/hw/mc146818rtc.c 2006-08-09 21:32:24.723838065 +0100 +--- ioemu.orig/hw/mc146818rtc.c 2006-08-17 19:58:03.222720593 +0100 ++++ ioemu/hw/mc146818rtc.c 2006-08-17 19:58:08.528134087 +0100 @@ -178,10 +178,27 @@ } } @@ -46,8 +46,8 @@ Index: ioemu/hw/mc146818rtc.c static void rtc_copy_date(RTCState *s) Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-09 21:32:24.449868968 +0100 -+++ ioemu/hw/pc.c 2006-08-09 21:32:24.724837952 +0100 +--- ioemu.orig/hw/pc.c 2006-08-17 19:58:08.252164595 +0100 ++++ ioemu/hw/pc.c 2006-08-17 19:58:08.529133976 +0100 @@ -159,7 +159,7 @@ } @@ -117,8 +117,8 @@ Index: ioemu/hw/pc.c QEMUMachine pc_machine = { Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-09 21:32:24.591852952 +0100 -+++ ioemu/vl.c 2006-08-09 21:32:24.727837614 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:58:08.395148788 +0100 ++++ ioemu/vl.c 2006-08-17 19:58:08.532133645 +0100 @@ -163,6 +163,8 @@ int xc_handle; @@ -162,7 +162,7 @@ Index: ioemu/vl.c } } } -@@ -6489,7 +6497,8 @@ +@@ -6507,7 +6515,8 @@ machine->init(ram_size, vga_ram_size, boot_device, ds, fd_filename, snapshot, @@ -174,9 +174,9 @@ Index: ioemu/vl.c if (usb_enabled) { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-09 21:32:24.454868404 +0100 -+++ ioemu/vl.h 2006-08-09 21:32:24.728837501 +0100 -@@ -575,7 +575,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:58:08.257164042 +0100 ++++ ioemu/vl.h 2006-08-17 19:58:08.532133645 +0100 +@@ -576,7 +576,7 @@ int boot_device, DisplayState *ds, const char **fd_filename, int snapshot, const char *kernel_filename, const char *kernel_cmdline, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/hypervisor-pit --- a/tools/ioemu/patches/hypervisor-pit Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/hypervisor-pit Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-06 02:22:26.380544784 +0100 -+++ ioemu/Makefile.target 2006-08-06 02:23:23.059226607 +0100 +--- ioemu.orig/Makefile.target 2006-08-17 19:49:33.813030472 +0100 ++++ ioemu/Makefile.target 2006-08-17 19:49:50.228216099 +0100 @@ -354,7 +354,7 @@ ifeq ($(TARGET_BASE_ARCH), i386) # Hardware support @@ -13,8 +13,8 @@ Index: ioemu/Makefile.target DEFINES += -DHAS_AUDIO Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-06 02:22:35.588518336 +0100 -+++ ioemu/hw/pc.c 2006-08-06 02:23:07.875919141 +0100 +--- ioemu.orig/hw/pc.c 2006-08-17 19:49:35.507843144 +0100 ++++ ioemu/hw/pc.c 2006-08-17 19:49:50.229215988 +0100 @@ -38,7 +38,9 @@ static fdctrl_t *floppy_controller; @@ -38,8 +38,8 @@ Index: ioemu/hw/pc.c pic_set_alt_irq_func(isa_pic, ioapic_set_irq, ioapic); Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:23:05.216215627 +0100 -+++ ioemu/vl.c 2006-08-06 02:23:07.878918807 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:49:48.566399780 +0100 ++++ ioemu/vl.c 2006-08-17 19:49:50.231215767 +0100 @@ -5570,6 +5570,7 @@ #ifdef HAS_AUDIO diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/ide-hd-multithread --- a/tools/ioemu/patches/ide-hd-multithread Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/ide-hd-multithread Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/ide.c Index: ioemu/hw/ide.c =================================================================== ---- ioemu.orig/hw/ide.c 2006-08-06 02:03:50.520919718 +0100 -+++ ioemu/hw/ide.c 2006-08-06 02:23:41.153209614 +0100 +--- ioemu.orig/hw/ide.c 2006-08-17 19:37:36.267534285 +0100 ++++ ioemu/hw/ide.c 2006-08-17 19:49:57.830375828 +0100 @@ -22,6 +22,7 @@ * THE SOFTWARE. */ diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/ioemu-ia64 --- a/tools/ioemu/patches/ioemu-ia64 Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/ioemu-ia64 Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/iommu.c Index: ioemu/hw/iommu.c =================================================================== ---- ioemu.orig/hw/iommu.c 2006-08-15 13:51:31.425498388 +0100 -+++ ioemu/hw/iommu.c 2006-08-15 13:51:35.834011166 +0100 +--- ioemu.orig/hw/iommu.c 2006-08-17 19:37:36.791476068 +0100 ++++ ioemu/hw/iommu.c 2006-08-17 19:48:27.357375720 +0100 @@ -82,7 +82,11 @@ #define IOPTE_VALID 0x00000002 /* IOPTE is valid */ #define IOPTE_WAZ 0x00000001 /* Write as zeros */ @@ -16,8 +16,8 @@ Index: ioemu/hw/iommu.c Index: ioemu/cpu-all.h =================================================================== ---- ioemu.orig/cpu-all.h 2006-08-15 13:51:35.772018017 +0100 -+++ ioemu/cpu-all.h 2006-08-15 13:51:35.835011055 +0100 +--- ioemu.orig/cpu-all.h 2006-08-17 19:37:36.791476068 +0100 ++++ ioemu/cpu-all.h 2006-08-17 19:48:27.358375609 +0100 @@ -835,6 +835,31 @@ :"=m" (*(volatile long *)addr) :"dIr" (nr)); @@ -52,21 +52,21 @@ Index: ioemu/cpu-all.h /* memory API */ Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-15 13:51:35.824012271 +0100 -+++ ioemu/vl.c 2006-08-15 13:51:46.770802425 +0100 -@@ -6140,6 +6140,11 @@ - /* init the memory */ - phys_ram_size = ram_size + vga_ram_size + bios_size; +--- ioemu.orig/vl.c 2006-08-17 19:47:08.538087284 +0100 ++++ ioemu/vl.c 2006-08-17 19:57:50.666108706 +0100 +@@ -6144,6 +6144,11 @@ + + xc_handle = xc_interface_open(); +#if defined (__ia64__) + if (ram_size > MMIO_START) -+ ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */ ++ ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */ +#endif + - #ifdef CONFIG_DM - nr_pages = ram_size/PAGE_SIZE; -@@ -6151,6 +6156,7 @@ + tmp_nr_pages = nr_pages; + +@@ -6161,6 +6166,7 @@ exit(-1); } @@ -74,7 +74,7 @@ Index: ioemu/vl.c if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) { fprintf(logfile, "xc_get_pfn_list returned error %d\n", errno); exit(-1); -@@ -6173,6 +6179,41 @@ +@@ -6191,6 +6197,41 @@ free(page_array); @@ -100,9 +100,9 @@ Index: ioemu/vl.c + } + + if (ram_size > MMIO_START) { -+ for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++) -+ page_array[MMIO_START >> PAGE_SHIFT + i] = -+ page_array[IO_PAGE_START >> PAGE_SHIFT + 1]; ++ for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++) ++ page_array[MMIO_START >> PAGE_SHIFT + i] = ++ page_array[IO_PAGE_START >> PAGE_SHIFT + 1]; + } + + phys_ram_base = xc_map_foreign_batch(xc_handle, domid, @@ -118,8 +118,8 @@ Index: ioemu/vl.c phys_ram_base = qemu_vmalloc(phys_ram_size); Index: ioemu/target-i386-dm/exec-dm.c =================================================================== ---- ioemu.orig/target-i386-dm/exec-dm.c 2006-08-15 13:51:35.705025421 +0100 -+++ ioemu/target-i386-dm/exec-dm.c 2006-08-15 13:51:51.987225890 +0100 +--- ioemu.orig/target-i386-dm/exec-dm.c 2006-08-17 19:37:36.792475957 +0100 ++++ ioemu/target-i386-dm/exec-dm.c 2006-08-17 19:48:27.361375278 +0100 @@ -341,6 +341,23 @@ return io_mem_read[io_index >> IO_MEM_SHIFT]; } @@ -156,8 +156,8 @@ Index: ioemu/target-i386-dm/exec-dm.c if (io_index) { Index: ioemu/exec-all.h =================================================================== ---- ioemu.orig/exec-all.h 2006-08-15 13:51:35.682027963 +0100 -+++ ioemu/exec-all.h 2006-08-15 13:51:35.839010613 +0100 +--- ioemu.orig/exec-all.h 2006-08-17 19:37:36.791476068 +0100 ++++ ioemu/exec-all.h 2006-08-17 19:48:27.362375167 +0100 @@ -462,12 +462,13 @@ } #endif @@ -177,8 +177,8 @@ Index: ioemu/exec-all.h Index: ioemu/target-i386-dm/cpu.h =================================================================== ---- ioemu.orig/target-i386-dm/cpu.h 2006-08-15 13:51:35.704025531 +0100 -+++ ioemu/target-i386-dm/cpu.h 2006-08-15 13:51:35.839010613 +0100 +--- ioemu.orig/target-i386-dm/cpu.h 2006-08-17 19:37:36.792475957 +0100 ++++ ioemu/target-i386-dm/cpu.h 2006-08-17 19:48:27.362375167 +0100 @@ -80,7 +80,11 @@ /* helper2.c */ int main_loop(void); @@ -194,7 +194,7 @@ Index: ioemu/ia64_intrinsic.h Index: ioemu/ia64_intrinsic.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/ia64_intrinsic.h 2006-08-15 13:51:35.840010502 +0100 ++++ ioemu/ia64_intrinsic.h 2006-08-17 19:48:27.363375057 +0100 @@ -0,0 +1,276 @@ +#ifndef IA64_INTRINSIC_H +#define IA64_INTRINSIC_H diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/qemu-allow-disable-sdl --- a/tools/ioemu/patches/qemu-allow-disable-sdl Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/qemu-allow-disable-sdl Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/configure Index: ioemu/configure =================================================================== ---- ioemu.orig/configure 2006-08-06 02:15:01.771108621 +0100 -+++ ioemu/configure 2006-08-06 02:42:26.213918476 +0100 +--- ioemu.orig/configure 2006-08-17 19:37:35.772589281 +0100 ++++ ioemu/configure 2006-08-17 19:50:24.735401975 +0100 @@ -228,8 +228,6 @@ ;; --enable-cocoa) cocoa="yes" ; coreaudio="yes" ; sdl="no" diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/qemu-fix-memset-args --- a/tools/ioemu/patches/qemu-fix-memset-args Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/qemu-fix-memset-args Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/audio/audio.c Index: ioemu/audio/audio.c =================================================================== ---- ioemu.orig/audio/audio.c 2006-08-06 02:03:50.013976165 +0100 -+++ ioemu/audio/audio.c 2006-08-06 02:42:28.991609008 +0100 +--- ioemu.orig/audio/audio.c 2006-08-17 19:37:35.755591169 +0100 ++++ ioemu/audio/audio.c 2006-08-17 19:50:26.867166346 +0100 @@ -605,11 +605,11 @@ } diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/qemu-fix-write-to-disk-synchronous --- a/tools/ioemu/patches/qemu-fix-write-to-disk-synchronous Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/qemu-fix-write-to-disk-synchronous Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/block-vmdk.c Index: ioemu/block-vmdk.c =================================================================== ---- ioemu.orig/block-vmdk.c 2006-08-06 02:03:45.756450226 +0100 -+++ ioemu/block-vmdk.c 2006-08-06 02:42:31.884286728 +0100 +--- ioemu.orig/block-vmdk.c 2006-08-17 19:37:35.737593169 +0100 ++++ ioemu/block-vmdk.c 2006-08-17 19:50:28.884943317 +0100 @@ -96,7 +96,7 @@ uint32_t magic; int l1_size; @@ -13,8 +13,8 @@ Index: ioemu/block-vmdk.c if (fd < 0) Index: ioemu/block-qcow.c =================================================================== ---- ioemu.orig/block-qcow.c 2006-08-06 02:03:45.754450449 +0100 -+++ ioemu/block-qcow.c 2006-08-06 02:42:31.885286616 +0100 +--- ioemu.orig/block-qcow.c 2006-08-17 19:37:35.737593169 +0100 ++++ ioemu/block-qcow.c 2006-08-17 19:50:28.885943206 +0100 @@ -95,7 +95,7 @@ int fd, len, i, shift; QCowHeader header; @@ -26,8 +26,8 @@ Index: ioemu/block-qcow.c if (fd < 0) Index: ioemu/block-bochs.c =================================================================== ---- ioemu.orig/block-bochs.c 2006-08-06 01:55:02.225741179 +0100 -+++ ioemu/block-bochs.c 2006-08-06 02:42:31.885286616 +0100 +--- ioemu.orig/block-bochs.c 2006-08-17 19:37:35.737593169 +0100 ++++ ioemu/block-bochs.c 2006-08-17 19:50:28.885943206 +0100 @@ -91,7 +91,7 @@ int fd, i; struct bochs_header bochs; @@ -39,8 +39,8 @@ Index: ioemu/block-bochs.c if (fd < 0) Index: ioemu/block.c =================================================================== ---- ioemu.orig/block.c 2006-08-06 02:42:18.880735483 +0100 -+++ ioemu/block.c 2006-08-06 02:42:31.886286505 +0100 +--- ioemu.orig/block.c 2006-08-17 19:50:18.872050063 +0100 ++++ ioemu/block.c 2006-08-17 19:50:28.885943206 +0100 @@ -685,7 +685,7 @@ int rv; #endif @@ -52,8 +52,8 @@ Index: ioemu/block.c if (fd < 0) Index: ioemu/block-cow.c =================================================================== ---- ioemu.orig/block-cow.c 2006-08-06 02:03:45.751450783 +0100 -+++ ioemu/block-cow.c 2006-08-06 02:42:31.886286505 +0100 +--- ioemu.orig/block-cow.c 2006-08-17 19:37:35.738593058 +0100 ++++ ioemu/block-cow.c 2006-08-17 19:50:28.886943095 +0100 @@ -69,7 +69,7 @@ struct cow_header_v2 cow_header; int64_t size; @@ -65,8 +65,8 @@ Index: ioemu/block-cow.c if (fd < 0) Index: ioemu/block-cloop.c =================================================================== ---- ioemu.orig/block-cloop.c 2006-08-06 01:55:02.226741067 +0100 -+++ ioemu/block-cloop.c 2006-08-06 02:42:31.886286505 +0100 +--- ioemu.orig/block-cloop.c 2006-08-17 19:37:35.737593169 +0100 ++++ ioemu/block-cloop.c 2006-08-17 19:50:28.886943095 +0100 @@ -55,7 +55,7 @@ BDRVCloopState *s = bs->opaque; uint32_t offsets_size,max_compressed_block_size=1,i; diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/serial-non-block --- a/tools/ioemu/patches/serial-non-block Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/serial-non-block Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/vl.c Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:23:29.827472129 +0100 -+++ ioemu/vl.c 2006-08-06 02:23:36.856688561 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:49:52.162002356 +0100 ++++ ioemu/vl.c 2006-08-17 19:49:56.273547905 +0100 @@ -1175,19 +1175,34 @@ static int unix_write(int fd, const uint8_t *buf, int len1) diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/series --- a/tools/ioemu/patches/series Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/series Sun Aug 20 11:08:45 2006 -0400 @@ -41,3 +41,5 @@ qemu-fix-memset-args qemu-fix-memset-args qemu-fix-write-to-disk-synchronous xen-support-buffered-ioreqs +qemu-daemonize +xen-platform-device diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/shadow-vram --- a/tools/ioemu/patches/shadow-vram Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/shadow-vram Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/vga.c Index: ioemu/hw/vga.c =================================================================== ---- ioemu.orig/hw/vga.c 2006-08-06 02:23:29.824472464 +0100 -+++ ioemu/hw/vga.c 2006-08-06 02:23:33.873021159 +0100 +--- ioemu.orig/hw/vga.c 2006-08-17 19:49:52.159002688 +0100 ++++ ioemu/hw/vga.c 2006-08-17 19:49:54.575735565 +0100 @@ -1359,6 +1359,105 @@ } } @@ -137,8 +137,8 @@ Index: ioemu/hw/vga.c s->vram_size = vga_ram_size; Index: ioemu/hw/vga_int.h =================================================================== ---- ioemu.orig/hw/vga_int.h 2006-08-06 02:23:29.824472464 +0100 -+++ ioemu/hw/vga_int.h 2006-08-06 02:23:33.874021048 +0100 +--- ioemu.orig/hw/vga_int.h 2006-08-17 19:49:52.159002688 +0100 ++++ ioemu/hw/vga_int.h 2006-08-17 19:49:54.575735565 +0100 @@ -79,6 +79,7 @@ #define VGA_STATE_COMMON \ diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/shared-vram --- a/tools/ioemu/patches/shared-vram Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/shared-vram Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/cirrus_vga.c Index: ioemu/hw/cirrus_vga.c =================================================================== ---- ioemu.orig/hw/cirrus_vga.c 2006-08-06 01:55:02.822674718 +0100 -+++ ioemu/hw/cirrus_vga.c 2006-08-06 02:23:29.822472686 +0100 +--- ioemu.orig/hw/cirrus_vga.c 2006-08-17 19:37:36.372522620 +0100 ++++ ioemu/hw/cirrus_vga.c 2006-08-17 19:49:52.157002909 +0100 @@ -28,6 +28,9 @@ */ #include "vl.h" @@ -176,8 +176,8 @@ Index: ioemu/hw/cirrus_vga.c } Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-06 02:23:07.875919141 +0100 -+++ ioemu/hw/pc.c 2006-08-06 02:23:29.823472575 +0100 +--- ioemu.orig/hw/pc.c 2006-08-17 19:49:50.229215988 +0100 ++++ ioemu/hw/pc.c 2006-08-17 19:49:52.158002799 +0100 @@ -790,14 +790,14 @@ if (cirrus_vga_enabled) { if (pci_enabled) { @@ -198,8 +198,8 @@ Index: ioemu/hw/pc.c Index: ioemu/hw/vga.c =================================================================== ---- ioemu.orig/hw/vga.c 2006-08-06 02:22:46.606290142 +0100 -+++ ioemu/hw/vga.c 2006-08-06 02:23:29.824472464 +0100 +--- ioemu.orig/hw/vga.c 2006-08-17 19:49:37.764593706 +0100 ++++ ioemu/hw/vga.c 2006-08-17 19:49:52.159002688 +0100 @@ -1858,6 +1858,7 @@ /* TODO: add vbe support if enabled */ } @@ -251,8 +251,8 @@ Index: ioemu/hw/vga.c Index: ioemu/hw/vga_int.h =================================================================== ---- ioemu.orig/hw/vga_int.h 2006-08-06 02:14:09.797902638 +0100 -+++ ioemu/hw/vga_int.h 2006-08-06 02:23:29.824472464 +0100 +--- ioemu.orig/hw/vga_int.h 2006-08-17 19:37:36.372522620 +0100 ++++ ioemu/hw/vga_int.h 2006-08-17 19:49:52.159002688 +0100 @@ -169,5 +169,6 @@ unsigned int color0, unsigned int color1, unsigned int color_xor); @@ -262,8 +262,8 @@ Index: ioemu/hw/vga_int.h extern const uint8_t gr_mask[16]; Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:23:07.878918807 +0100 -+++ ioemu/vl.c 2006-08-06 02:23:29.827472129 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:49:50.231215767 +0100 ++++ ioemu/vl.c 2006-08-17 19:49:52.162002356 +0100 @@ -5693,6 +5693,78 @@ #define MAX_NET_CLIENTS 32 @@ -345,9 +345,9 @@ Index: ioemu/vl.c #ifdef CONFIG_GDBSTUB Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-06 02:23:02.271543880 +0100 -+++ ioemu/vl.h 2006-08-06 02:23:29.828472018 +0100 -@@ -144,6 +144,13 @@ +--- ioemu.orig/vl.h 2006-08-17 19:49:44.492850031 +0100 ++++ ioemu/vl.h 2006-08-17 19:49:52.163002246 +0100 +@@ -145,6 +145,13 @@ void main_loop_wait(int timeout); diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/support-xm-console --- a/tools/ioemu/patches/support-xm-console Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/support-xm-console Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/vl.c Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:22:59.254880158 +0100 -+++ ioemu/vl.c 2006-08-06 02:23:05.216215627 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:49:40.119333436 +0100 ++++ ioemu/vl.c 2006-08-17 19:49:48.566399780 +0100 @@ -1536,26 +1536,65 @@ return chr; } diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-access-monitor-vt --- a/tools/ioemu/patches/vnc-access-monitor-vt Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/vnc-access-monitor-vt Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/vnc.c Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-08-07 17:44:33.084748631 +0100 -+++ ioemu/vnc.c 2006-08-07 17:44:33.224733389 +0100 +--- ioemu.orig/vnc.c 2006-08-17 19:50:14.623519661 +0100 ++++ ioemu/vnc.c 2006-08-17 19:50:15.956372339 +0100 @@ -32,6 +32,10 @@ #include "vnc_keysym.h" #include "keymaps.c" diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-cleanup --- a/tools/ioemu/patches/vnc-cleanup Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/vnc-cleanup Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/vnc.c Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-08-07 17:42:21.888055419 +0100 -+++ ioemu/vnc.c 2006-08-07 17:42:28.001363557 +0100 +--- ioemu.orig/vnc.c 2006-08-17 19:37:36.091553839 +0100 ++++ ioemu/vnc.c 2006-08-17 19:50:10.313996001 +0100 @@ -143,13 +143,16 @@ static void vnc_dpy_update(DisplayState *ds, int x, int y, int w, int h) { @@ -65,8 +65,8 @@ Index: ioemu/vnc.c static void vnc_timer_init(VncState *vs) Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:42:27.807385506 +0100 -+++ ioemu/vl.c 2006-08-07 17:42:28.004363230 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:50:02.410869542 +0100 ++++ ioemu/vl.c 2006-08-17 19:50:10.316995669 +0100 @@ -5120,10 +5120,10 @@ /* XXX: better handling of removal */ for(ioh = first_io_handler; ioh != NULL; ioh = ioh_next) { diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-display-find-unused --- a/tools/ioemu/patches/vnc-display-find-unused Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/vnc-display-find-unused Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/vnc.c Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-08-09 14:44:44.721942535 +0100 -+++ ioemu/vnc.c 2006-08-09 14:52:37.262165292 +0100 +--- ioemu.orig/vnc.c 2006-08-17 19:50:15.956372339 +0100 ++++ ioemu/vnc.c 2006-08-17 19:50:17.083247783 +0100 @@ -1183,7 +1183,7 @@ } } @@ -50,8 +50,8 @@ Index: ioemu/vnc.c int vnc_start_viewer(int port) Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-09 14:44:44.721942535 +0100 -+++ ioemu/vl.c 2006-08-09 14:52:06.783905832 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:50:13.152682236 +0100 ++++ ioemu/vl.c 2006-08-17 19:50:17.086247452 +0100 @@ -121,6 +121,7 @@ static DisplayState display_state; int nographic; @@ -104,7 +104,7 @@ Index: ioemu/vl.c } } } -@@ -6465,7 +6475,7 @@ +@@ -6483,7 +6493,7 @@ if (nographic) { dumb_display_init(ds); } else if (vnc_display != -1) { @@ -115,9 +115,9 @@ Index: ioemu/vl.c } else { Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-09 14:44:44.721942535 +0100 -+++ ioemu/vl.h 2006-08-09 14:52:06.783905832 +0100 -@@ -784,7 +784,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:50:13.153682125 +0100 ++++ ioemu/vl.h 2006-08-17 19:50:17.087247341 +0100 +@@ -785,7 +785,7 @@ void cocoa_display_init(DisplayState *ds, int full_screen); /* vnc.c */ diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-fixes --- a/tools/ioemu/patches/vnc-fixes Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/vnc-fixes Sun Aug 20 11:08:45 2006 -0400 @@ -1,8 +1,8 @@ Index: ioemu/vl.c Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:42:28.004363230 +0100 -+++ ioemu/vl.c 2006-08-07 17:43:16.361100898 +0100 -@@ -6516,8 +6516,10 @@ +--- ioemu.orig/vl.c 2006-08-17 19:50:10.316995669 +0100 ++++ ioemu/vl.c 2006-08-17 19:50:12.100798502 +0100 +@@ -6534,8 +6534,10 @@ } } @@ -17,8 +17,8 @@ Index: ioemu/vl.c if (use_gdbstub) { Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-08-07 17:42:28.001363557 +0100 -+++ ioemu/vnc.c 2006-08-07 17:43:33.593225293 +0100 +--- ioemu.orig/vnc.c 2006-08-17 19:50:10.313996001 +0100 ++++ ioemu/vnc.c 2006-08-17 19:50:12.101798392 +0100 @@ -3,6 +3,7 @@ * * Copyright (C) 2006 Anthony Liguori <anthony@xxxxxxxxxxxxx> @@ -524,9 +524,9 @@ Index: ioemu/vnc.c } Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:42:27.807385506 +0100 -+++ ioemu/vl.h 2006-08-07 17:43:16.361100898 +0100 -@@ -318,6 +318,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:50:02.411869432 +0100 ++++ ioemu/vl.h 2006-08-17 19:50:12.102798281 +0100 +@@ -319,6 +319,7 @@ int is_graphic_console(void); CharDriverState *text_console_init(DisplayState *ds); void console_select(unsigned int index); diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-start-vncviewer --- a/tools/ioemu/patches/vnc-start-vncviewer Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/vnc-start-vncviewer Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/vnc.c Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-08-07 17:44:32.852773890 +0100 -+++ ioemu/vnc.c 2006-08-07 17:44:32.915767031 +0100 +--- ioemu.orig/vnc.c 2006-08-17 19:50:12.101798392 +0100 ++++ ioemu/vnc.c 2006-08-17 19:50:13.149682567 +0100 @@ -1175,3 +1175,25 @@ vnc_dpy_resize(vs->ds, 640, 400); @@ -30,8 +30,8 @@ Index: ioemu/vnc.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-07 17:44:32.851773999 +0100 -+++ ioemu/vl.c 2006-08-07 17:44:32.918766704 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:50:12.100798502 +0100 ++++ ioemu/vl.c 2006-08-17 19:50:13.152682236 +0100 @@ -120,6 +120,7 @@ int bios_size; static DisplayState display_state; @@ -82,7 +82,7 @@ Index: ioemu/vl.c } } } -@@ -6458,6 +6466,8 @@ +@@ -6476,6 +6484,8 @@ dumb_display_init(ds); } else if (vnc_display != -1) { vnc_display_init(ds, vnc_display); @@ -93,9 +93,9 @@ Index: ioemu/vl.c sdl_display_init(ds, full_screen); Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-07 17:44:32.853773781 +0100 -+++ ioemu/vl.h 2006-08-07 17:44:32.919766595 +0100 -@@ -785,6 +785,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:50:12.102798281 +0100 ++++ ioemu/vl.h 2006-08-17 19:50:13.153682125 +0100 +@@ -786,6 +786,7 @@ /* vnc.c */ void vnc_display_init(DisplayState *ds, int display); diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-title-domain-name --- a/tools/ioemu/patches/vnc-title-domain-name Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/vnc-title-domain-name Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/vnc.c Index: ioemu/vnc.c =================================================================== ---- ioemu.orig/vnc.c 2006-08-07 17:44:32.915767031 +0100 -+++ ioemu/vnc.c 2006-08-07 17:44:33.084748631 +0100 +--- ioemu.orig/vnc.c 2006-08-17 19:50:13.149682567 +0100 ++++ ioemu/vnc.c 2006-08-17 19:50:14.623519661 +0100 @@ -1014,6 +1014,7 @@ static int protocol_client_init(VncState *vs, char *data, size_t len) diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/xen-mm --- a/tools/ioemu/patches/xen-mm Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/xen-mm Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/hw/pc.c Index: ioemu/hw/pc.c =================================================================== ---- ioemu.orig/hw/pc.c 2006-08-06 02:15:39.708879311 +0100 -+++ ioemu/hw/pc.c 2006-08-06 02:18:18.875135656 +0100 +--- ioemu.orig/hw/pc.c 2006-08-17 19:36:00.588166019 +0100 ++++ ioemu/hw/pc.c 2006-08-17 19:37:36.704485734 +0100 @@ -646,7 +646,9 @@ } @@ -25,8 +25,8 @@ Index: ioemu/hw/pc.c isa_bios_size = bios_size; Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-06 02:18:12.550840673 +0100 -+++ ioemu/vl.c 2006-08-06 02:18:45.608155528 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:36:00.667157242 +0100 ++++ ioemu/vl.c 2006-08-17 19:47:08.538087284 +0100 @@ -158,6 +158,8 @@ int acpi_enabled = 1; int fd_bootchk = 1; @@ -40,7 +40,7 @@ Index: ioemu/vl.c QEMUMachine *machine; char usb_devices[MAX_USB_CMDLINE][128]; int usb_devices_index; -+ unsigned long nr_pages; ++ unsigned long nr_pages, tmp_nr_pages, shared_page_nr; + xen_pfn_t *page_array; + extern void *shared_page; @@ -60,16 +60,26 @@ Index: ioemu/vl.c break; case QEMU_OPTION_l: { -@@ -6133,12 +6140,49 @@ +@@ -6133,12 +6140,67 @@ /* init the memory */ phys_ram_size = ram_size + vga_ram_size + bios_size; +#ifdef CONFIG_DM + -+ nr_pages = ram_size/PAGE_SIZE; + xc_handle = xc_interface_open(); + -+ page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t)); ++ nr_pages = ram_size/PAGE_SIZE; ++ tmp_nr_pages = nr_pages; ++ ++#if defined(__i386__) || defined(__x86_64__) ++ if (ram_size > HVM_BELOW_4G_RAM_END) { ++ tmp_nr_pages += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; ++ shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; ++ } else ++ shared_page_nr = nr_pages - 1; ++#endif ++ ++ page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t)); + if (page_array == NULL) { + fprintf(logfile, "malloc returned error %d\n", errno); + exit(-1); @@ -80,20 +90,28 @@ Index: ioemu/vl.c + exit(-1); + } + ++ if (ram_size > HVM_BELOW_4G_RAM_END) ++ for (i = 0; i < nr_pages - (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT); i++) ++ page_array[tmp_nr_pages - 1 - i] = page_array[nr_pages - 1 - i]; ++ + phys_ram_base = xc_map_foreign_batch(xc_handle, domid, + PROT_READ|PROT_WRITE, page_array, -+ nr_pages - 1); -+ if (phys_ram_base == 0) { -+ fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno); ++ tmp_nr_pages); ++ if (phys_ram_base == NULL) { ++ fprintf(logfile, "batch map guest memory returned error %d\n", errno); + exit(-1); + } + + shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, -+ page_array[nr_pages - 1]); ++ page_array[shared_page_nr]); ++ if (shared_page == NULL) { ++ fprintf(logfile, "map shared IO page returned error %d\n", errno); ++ exit(-1); ++ } + -+ fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", nr_pages - 1, -+ (uint64_t)(page_array[nr_pages - 1])); ++ fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", ++ shared_page_nr, (uint64_t)(page_array[shared_page_nr])); + + free(page_array); + @@ -110,3 +128,28 @@ Index: ioemu/vl.c /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); if (cdrom_index >= 0) { +Index: ioemu/hw/piix_pci.c +=================================================================== +--- ioemu.orig/hw/piix_pci.c 2006-08-17 19:37:36.189542951 +0100 ++++ ioemu/hw/piix_pci.c 2006-08-17 19:38:05.806252180 +0100 +@@ -399,7 +399,7 @@ + uint8_t elcr[2]; + + pci_bios_io_addr = 0xc000; +- pci_bios_mem_addr = 0xf0000000; ++ pci_bios_mem_addr = HVM_BELOW_4G_MMIO_START; + + /* activate IRQ mappings */ + elcr[0] = 0x00; +Index: ioemu/vl.h +=================================================================== +--- ioemu.orig/vl.h 2006-08-17 19:37:36.529505177 +0100 ++++ ioemu/vl.h 2006-08-17 19:47:32.680418959 +0100 +@@ -39,6 +39,7 @@ + #include <sys/stat.h> + #include "xenctrl.h" + #include "xs.h" ++#include <xen/hvm/e820.h> + + #ifndef O_LARGEFILE + #define O_LARGEFILE 0 diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/xen-support-buffered-ioreqs --- a/tools/ioemu/patches/xen-support-buffered-ioreqs Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/xen-support-buffered-ioreqs Sun Aug 20 11:08:45 2006 -0400 @@ -1,38 +1,38 @@ Index: ioemu/vl.c Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-09 15:04:25.583508863 +0100 -+++ ioemu/vl.c 2006-08-09 15:04:26.034465993 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:50:22.277673633 +0100 ++++ ioemu/vl.c 2006-08-17 19:55:21.878556486 +0100 @@ -5838,6 +5838,7 @@ - unsigned long nr_pages; + unsigned long nr_pages, tmp_nr_pages, shared_page_nr; xen_pfn_t *page_array; extern void *shared_page; + extern void *buffered_io_page; char qemu_dm_logfilename[64]; -@@ -6388,12 +6389,17 @@ - - phys_ram_base = xc_map_foreign_batch(xc_handle, domid, - PROT_READ|PROT_WRITE, page_array, -- nr_pages - 1); -+ nr_pages - 3); - if (phys_ram_base == 0) { - fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno); - exit(-1); - } +@@ -6419,6 +6420,18 @@ + fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", + shared_page_nr, (uint64_t)(page_array[shared_page_nr])); + /* not yet add for IA64 */ + buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, -+ PROT_READ|PROT_WRITE, -+ page_array[nr_pages - 3]); ++ PROT_READ|PROT_WRITE, ++ page_array[shared_page_nr - 2]); ++ if (buffered_io_page == NULL) { ++ fprintf(logfile, "map buffered IO page returned error %d\n", errno); ++ exit(-1); ++ } + - shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, - PROT_READ|PROT_WRITE, - page_array[nr_pages - 1]); ++ fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n", ++ shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2])); ++ + free(page_array); + + #elif defined(__ia64__) Index: ioemu/target-i386-dm/helper2.c =================================================================== ---- ioemu.orig/target-i386-dm/helper2.c 2006-08-09 15:04:24.105649313 +0100 -+++ ioemu/target-i386-dm/helper2.c 2006-08-09 15:04:26.040465422 +0100 +--- ioemu.orig/target-i386-dm/helper2.c 2006-08-17 19:49:44.491850141 +0100 ++++ ioemu/target-i386-dm/helper2.c 2006-08-17 19:50:41.490549986 +0100 @@ -76,6 +76,10 @@ shared_iopage_t *shared_page = NULL; diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/xenstore-block-device-config --- a/tools/ioemu/patches/xenstore-block-device-config Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/xenstore-block-device-config Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/Makefile.target Index: ioemu/Makefile.target =================================================================== ---- ioemu.orig/Makefile.target 2006-08-09 21:32:24.915816410 +0100 -+++ ioemu/Makefile.target 2006-08-09 21:32:25.500750429 +0100 +--- ioemu.orig/Makefile.target 2006-08-17 19:50:02.405870095 +0100 ++++ ioemu/Makefile.target 2006-08-17 19:50:18.866050726 +0100 @@ -358,6 +358,7 @@ VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o VL_OBJS+= usb-uhci.o @@ -13,7 +13,7 @@ Index: ioemu/xenstore.c Index: ioemu/xenstore.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ ioemu/xenstore.c 2006-08-09 21:32:25.501750317 +0100 ++++ ioemu/xenstore.c 2006-08-17 19:50:18.867050616 +0100 @@ -0,0 +1,187 @@ +/* + * This file is subject to the terms and conditions of the GNU General @@ -204,8 +204,8 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-09 21:32:25.438757422 +0100 -+++ ioemu/vl.c 2006-08-09 21:32:25.504749978 +0100 +--- ioemu.orig/vl.c 2006-08-17 19:50:17.086247452 +0100 ++++ ioemu/vl.c 2006-08-17 19:50:18.870050284 +0100 @@ -5243,9 +5243,11 @@ "Standard options:\n" "-M machine select emulated machine (-M ? for list)\n" @@ -359,7 +359,7 @@ Index: ioemu/vl.c setvbuf(stdout, NULL, _IOLBF, 0); -@@ -6417,6 +6448,7 @@ +@@ -6435,6 +6466,7 @@ #endif /* !CONFIG_DM */ @@ -367,7 +367,7 @@ Index: ioemu/vl.c /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); if (cdrom_index >= 0) { -@@ -6443,6 +6475,7 @@ +@@ -6461,6 +6493,7 @@ } } } @@ -375,7 +375,7 @@ Index: ioemu/vl.c /* we always create at least one floppy disk */ fd_table[0] = bdrv_new("fda"); -@@ -6521,6 +6554,8 @@ +@@ -6539,6 +6572,8 @@ } } @@ -386,8 +386,8 @@ Index: ioemu/vl.c kernel_filename, kernel_cmdline, initrd_filename, Index: ioemu/monitor.c =================================================================== ---- ioemu.orig/monitor.c 2006-08-09 21:32:24.238892765 +0100 -+++ ioemu/monitor.c 2006-08-09 21:32:25.505749865 +0100 +--- ioemu.orig/monitor.c 2006-08-17 19:49:44.491850141 +0100 ++++ ioemu/monitor.c 2006-08-17 19:50:18.871050174 +0100 @@ -24,6 +24,7 @@ #include "vl.h" #include "disas.h" @@ -416,8 +416,8 @@ Index: ioemu/monitor.c int i; Index: ioemu/block.c =================================================================== ---- ioemu.orig/block.c 2006-08-09 21:32:18.339558126 +0100 -+++ ioemu/block.c 2006-08-09 21:32:25.506749753 +0100 +--- ioemu.orig/block.c 2006-08-17 19:37:35.865578948 +0100 ++++ ioemu/block.c 2006-08-17 19:50:18.872050063 +0100 @@ -758,6 +758,7 @@ static void raw_close(BlockDriverState *bs) { @@ -428,9 +428,9 @@ Index: ioemu/block.c Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-09 21:32:25.439757309 +0100 -+++ ioemu/vl.h 2006-08-09 21:32:25.506749753 +0100 -@@ -1187,6 +1187,8 @@ +--- ioemu.orig/vl.h 2006-08-17 19:50:17.087247341 +0100 ++++ ioemu/vl.h 2006-08-17 19:50:18.872050063 +0100 +@@ -1188,6 +1188,8 @@ void term_print_help(void); void monitor_readline(const char *prompt, int is_password, char *buf, int buf_size); @@ -439,7 +439,7 @@ Index: ioemu/vl.h /* readline.c */ typedef void ReadLineFunc(void *opaque, const char *str); -@@ -1199,6 +1201,13 @@ +@@ -1200,6 +1202,13 @@ void readline_start(const char *prompt, int is_password, ReadLineFunc *readline_func, void *opaque); @@ -455,8 +455,8 @@ Index: ioemu/vl.h extern char domain_name[]; Index: ioemu/hw/ide.c =================================================================== ---- ioemu.orig/hw/ide.c 2006-08-09 21:32:24.658845396 +0100 -+++ ioemu/hw/ide.c 2006-08-09 21:32:25.508749527 +0100 +--- ioemu.orig/hw/ide.c 2006-08-17 19:49:57.830375828 +0100 ++++ ioemu/hw/ide.c 2006-08-17 19:50:18.874049842 +0100 @@ -1158,6 +1158,7 @@ } else { ide_atapi_cmd_error(s, SENSE_NOT_READY, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/xenstore-write-vnc-port --- a/tools/ioemu/patches/xenstore-write-vnc-port Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/patches/xenstore-write-vnc-port Sun Aug 20 11:08:45 2006 -0400 @@ -1,7 +1,7 @@ Index: ioemu/xenstore.c Index: ioemu/xenstore.c =================================================================== ---- ioemu.orig/xenstore.c 2006-08-09 21:32:25.501750317 +0100 -+++ ioemu/xenstore.c 2006-08-09 21:32:25.706727195 +0100 +--- ioemu.orig/xenstore.c 2006-08-17 19:50:18.867050616 +0100 ++++ ioemu/xenstore.c 2006-08-17 19:50:22.274673964 +0100 @@ -185,3 +185,31 @@ free(image); free(vec); @@ -36,9 +36,9 @@ Index: ioemu/xenstore.c +} Index: ioemu/vl.c =================================================================== ---- ioemu.orig/vl.c 2006-08-09 21:32:25.504749978 +0100 -+++ ioemu/vl.c 2006-08-09 21:32:25.709726857 +0100 -@@ -6511,6 +6511,7 @@ +--- ioemu.orig/vl.c 2006-08-17 19:50:18.870050284 +0100 ++++ ioemu/vl.c 2006-08-17 19:50:22.277673633 +0100 +@@ -6529,6 +6529,7 @@ vnc_display = vnc_display_init(ds, vnc_display, vncunused); if (vncviewer) vnc_start_viewer(vnc_display); @@ -48,9 +48,9 @@ Index: ioemu/vl.c sdl_display_init(ds, full_screen); Index: ioemu/vl.h =================================================================== ---- ioemu.orig/vl.h 2006-08-09 21:32:25.506749753 +0100 -+++ ioemu/vl.h 2006-08-09 21:32:25.710726744 +0100 -@@ -1206,6 +1206,7 @@ +--- ioemu.orig/vl.h 2006-08-17 19:50:18.872050063 +0100 ++++ ioemu/vl.h 2006-08-17 19:50:22.278673522 +0100 +@@ -1207,6 +1207,7 @@ int xenstore_fd(void); void xenstore_process_event(void *opaque); void xenstore_check_new_media_present(int timeout); diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/vl.c Sun Aug 20 11:08:45 2006 -0400 @@ -5835,7 +5835,7 @@ int main(int argc, char **argv) QEMUMachine *machine; char usb_devices[MAX_USB_CMDLINE][128]; int usb_devices_index; - unsigned long nr_pages; + unsigned long nr_pages, tmp_nr_pages, shared_page_nr; xen_pfn_t *page_array; extern void *shared_page; extern void *buffered_io_page; @@ -6036,10 +6036,11 @@ int main(int argc, char **argv) } break; case QEMU_OPTION_nographic: - pstrcpy(monitor_device, sizeof(monitor_device), "stdio"); + if(!strcmp(monitor_device, "vc")) + pstrcpy(monitor_device, sizeof(monitor_device), "null"); if(!strcmp(serial_devices[0], "vc")) pstrcpy(serial_devices[0], sizeof(serial_devices[0]), - "stdio"); + "null"); nographic = 1; break; case QEMU_OPTION_kernel: @@ -6365,17 +6366,27 @@ int main(int argc, char **argv) /* init the memory */ phys_ram_size = ram_size + vga_ram_size + bios_size; +#ifdef CONFIG_DM + + xc_handle = xc_interface_open(); + #if defined (__ia64__) if (ram_size > MMIO_START) - ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */ -#endif - -#ifdef CONFIG_DM + ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */ +#endif nr_pages = ram_size/PAGE_SIZE; - xc_handle = xc_interface_open(); - - page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t)); + tmp_nr_pages = nr_pages; + +#if defined(__i386__) || defined(__x86_64__) + if (ram_size > HVM_BELOW_4G_RAM_END) { + tmp_nr_pages += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; + shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; + } else + shared_page_nr = nr_pages - 1; +#endif + + page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t)); if (page_array == NULL) { fprintf(logfile, "malloc returned error %d\n", errno); exit(-1); @@ -6387,25 +6398,40 @@ int main(int argc, char **argv) exit(-1); } + if (ram_size > HVM_BELOW_4G_RAM_END) + for (i = 0; i < nr_pages - (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT); i++) + page_array[tmp_nr_pages - 1 - i] = page_array[nr_pages - 1 - i]; + phys_ram_base = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE, page_array, - nr_pages - 3); - if (phys_ram_base == 0) { - fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno); + tmp_nr_pages); + if (phys_ram_base == NULL) { + fprintf(logfile, "batch map guest memory returned error %d\n", errno); exit(-1); } + + shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, + PROT_READ|PROT_WRITE, + page_array[shared_page_nr]); + if (shared_page == NULL) { + fprintf(logfile, "map shared IO page returned error %d\n", errno); + exit(-1); + } + + fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", + shared_page_nr, (uint64_t)(page_array[shared_page_nr])); /* not yet add for IA64 */ buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, - PROT_READ|PROT_WRITE, - page_array[nr_pages - 3]); - - shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, - PROT_READ|PROT_WRITE, - page_array[nr_pages - 1]); - - fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", nr_pages - 1, - (uint64_t)(page_array[nr_pages - 1])); + PROT_READ|PROT_WRITE, + page_array[shared_page_nr - 2]); + if (buffered_io_page == NULL) { + fprintf(logfile, "map buffered IO page returned error %d\n", errno); + exit(-1); + } + + fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n", + shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2])); free(page_array); @@ -6431,9 +6457,9 @@ int main(int argc, char **argv) } if (ram_size > MMIO_START) { - for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++) - page_array[MMIO_START >> PAGE_SHIFT + i] = - page_array[IO_PAGE_START >> PAGE_SHIFT + 1]; + for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++) + page_array[MMIO_START >> PAGE_SHIFT + i] = + page_array[IO_PAGE_START >> PAGE_SHIFT + 1]; } phys_ram_base = xc_map_foreign_batch(xc_handle, domid, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/ioemu/vl.h Sun Aug 20 11:08:45 2006 -0400 @@ -39,6 +39,7 @@ #include <sys/stat.h> #include "xenctrl.h" #include "xs.h" +#include <xen/hvm/e820.h> #ifndef O_LARGEFILE #define O_LARGEFILE 0 @@ -1208,6 +1209,9 @@ void xenstore_check_new_media_present(in void xenstore_check_new_media_present(int timeout); void xenstore_write_vncport(int vnc_display); +/* xen_platform.c */ +void pci_xen_platform_init(PCIBus *bus); + void kqemu_record_dump(void); diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libaio/src/Makefile --- a/tools/libaio/src/Makefile Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/libaio/src/Makefile Sun Aug 20 11:08:45 2006 -0400 @@ -1,3 +1,6 @@ prefix=/usr +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk + prefix=/usr includedir=$(prefix)/include libdir=$(prefix)/lib @@ -44,8 +47,8 @@ libaio_sobjs := $(patsubst %.c,%.os,$(li libaio.a: $(libaio_objs) rm -f libaio.a - ar r libaio.a $^ - ranlib libaio.a + $(AR) r libaio.a $^ + $(RANLIB) libaio.a $(libname): $(libaio_sobjs) libaio.map $(CC) $(SO_CFLAGS) -Wl,--version-script=libaio.map -Wl,-soname=$(soname) -o $@ $(libaio_sobjs) $(LINK_FLAGS) diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/libxc/xc_domain.c Sun Aug 20 11:08:45 2006 -0400 @@ -213,21 +213,28 @@ int xc_shadow_control(int xc_handle, unsigned int sop, unsigned long *dirty_bitmap, unsigned long pages, - xc_shadow_control_stats_t *stats ) + unsigned long *mb, + uint32_t mode, + xc_shadow_control_stats_t *stats) { int rc; DECLARE_DOM0_OP; op.cmd = DOM0_SHADOW_CONTROL; op.u.shadow_control.domain = (domid_t)domid; op.u.shadow_control.op = sop; + op.u.shadow_control.pages = pages; + op.u.shadow_control.mb = mb ? *mb : 0; + op.u.shadow_control.mode = mode; set_xen_guest_handle(op.u.shadow_control.dirty_bitmap, dirty_bitmap); - op.u.shadow_control.pages = pages; rc = do_dom0_op(xc_handle, &op); if ( stats ) memcpy(stats, &op.u.shadow_control.stats, sizeof(xc_shadow_control_stats_t)); + + if ( mb ) + *mb = op.u.shadow_control.mb; return (rc == 0) ? op.u.shadow_control.pages : rc; } @@ -391,7 +398,7 @@ int xc_domain_memory_populate_physmap(in if ( err > 0 ) { - DPRINTF("Failed deallocation for dom %d: %ld pages order %d\n", + DPRINTF("Failed allocation for dom %d: %ld pages order %d\n", domid, nr_extents, extent_order); errno = EBUSY; err = -1; diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/libxc/xc_hvm_build.c Sun Aug 20 11:08:45 2006 -0400 @@ -54,9 +54,19 @@ static void build_e820map(void *e820_pag { struct e820entry *e820entry = (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET); + unsigned long long extra_mem_size = 0; unsigned char nr_map = 0; - /* XXX: Doesn't work for > 4GB yet */ + /* + * physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved + * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END + * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above. + */ + if ( mem_size > HVM_BELOW_4G_RAM_END ) { + extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END; + mem_size = HVM_BELOW_4G_RAM_END; + } + e820entry[nr_map].addr = 0x0; e820entry[nr_map].size = 0x9F000; e820entry[nr_map].type = E820_RAM; @@ -77,53 +87,86 @@ static void build_e820map(void *e820_pag e820entry[nr_map].type = E820_RESERVED; nr_map++; -#define STATIC_PAGES 3 - /* 3 static pages: - * - ioreq buffer. - * - xenstore. - * - shared_page. - */ +/* ACPI data: 10 pages. */ +#define ACPI_DATA_PAGES 10 +/* ACPI NVS: 3 pages. */ +#define ACPI_NVS_PAGES 3 +/* buffered io page. */ +#define BUFFERED_IO_PAGES 1 +/* xenstore page. */ +#define XENSTORE_PAGES 1 +/* shared io page. */ +#define SHARED_IO_PAGES 1 +/* totally 16 static pages are reserved in E820 table */ /* Most of the ram goes here */ e820entry[nr_map].addr = 0x100000; - e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE; + e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * + (ACPI_DATA_PAGES + + ACPI_NVS_PAGES + + BUFFERED_IO_PAGES + + XENSTORE_PAGES + + SHARED_IO_PAGES); e820entry[nr_map].type = E820_RAM; nr_map++; /* Statically allocated special pages */ + /* For ACPI data */ + e820entry[nr_map].addr = mem_size - PAGE_SIZE * + (ACPI_DATA_PAGES + + ACPI_NVS_PAGES + + BUFFERED_IO_PAGES + + XENSTORE_PAGES + + SHARED_IO_PAGES); + e820entry[nr_map].size = PAGE_SIZE * ACPI_DATA_PAGES; + e820entry[nr_map].type = E820_ACPI; + nr_map++; + + /* For ACPI NVS */ + e820entry[nr_map].addr = mem_size - PAGE_SIZE * + (ACPI_NVS_PAGES + + BUFFERED_IO_PAGES + + XENSTORE_PAGES + + SHARED_IO_PAGES); + e820entry[nr_map].size = PAGE_SIZE * ACPI_NVS_PAGES; + e820entry[nr_map].type = E820_NVS; + nr_map++; + /* For buffered IO requests */ - e820entry[nr_map].addr = mem_size - 3 * PAGE_SIZE; - e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].addr = mem_size - PAGE_SIZE * + (BUFFERED_IO_PAGES + + XENSTORE_PAGES + + SHARED_IO_PAGES); + e820entry[nr_map].size = PAGE_SIZE * BUFFERED_IO_PAGES; e820entry[nr_map].type = E820_BUFFERED_IO; nr_map++; /* For xenstore */ - e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE; - e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].addr = mem_size - PAGE_SIZE * + (XENSTORE_PAGES + + SHARED_IO_PAGES); + e820entry[nr_map].size = PAGE_SIZE * XENSTORE_PAGES; e820entry[nr_map].type = E820_XENSTORE; nr_map++; /* Shared ioreq_t page */ - e820entry[nr_map].addr = mem_size - PAGE_SIZE; - e820entry[nr_map].size = PAGE_SIZE; + e820entry[nr_map].addr = mem_size - PAGE_SIZE * SHARED_IO_PAGES; + e820entry[nr_map].size = PAGE_SIZE * SHARED_IO_PAGES; e820entry[nr_map].type = E820_SHARED_PAGE; - nr_map++; - - e820entry[nr_map].addr = mem_size; - e820entry[nr_map].size = 0x3 * PAGE_SIZE; - e820entry[nr_map].type = E820_NVS; - nr_map++; - - e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE; - e820entry[nr_map].size = 0xA * PAGE_SIZE; - e820entry[nr_map].type = E820_ACPI; nr_map++; e820entry[nr_map].addr = 0xFEC00000; e820entry[nr_map].size = 0x1400000; e820entry[nr_map].type = E820_IO; nr_map++; + + if ( extra_mem_size ) { + e820entry[nr_map].addr = (1ULL << 32); + e820entry[nr_map].size = extra_mem_size; + e820entry[nr_map].type = E820_RAM; + nr_map++; + } *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map; } @@ -147,7 +190,7 @@ static void set_hvm_info_checksum(struct */ static int set_hvm_info(int xc_handle, uint32_t dom, xen_pfn_t *pfn_list, unsigned int vcpus, - unsigned int acpi, unsigned int apic) + unsigned int acpi) { char *va_map; struct hvm_info_table *va_hvm; @@ -170,8 +213,6 @@ static int set_hvm_info(int xc_handle, u set_hvm_info_checksum(va_hvm); munmap(va_map, PAGE_SIZE); - - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic); return 0; } @@ -200,11 +241,7 @@ static int setup_guest(int xc_handle, struct domain_setup_info dsi; uint64_t v_end; - unsigned long shared_page_frame = 0; - shared_iopage_t *sp; - - unsigned long ioreq_buffer_frame = 0; - void *ioreq_buffer_page; + unsigned long shared_page_nr; memset(&dsi, 0, sizeof(struct domain_setup_info)); @@ -256,23 +293,38 @@ static int setup_guest(int xc_handle, /* Write the machine->phys table entries. */ for ( count = 0; count < nr_pages; count++ ) { + unsigned long gpfn_count_skip; + ptr = (unsigned long long)page_array[count] << PAGE_SHIFT; + + gpfn_count_skip = 0; + + /* + * physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved + * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END + * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above. + */ + if ( count >= (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) ) + gpfn_count_skip = HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; + if ( xc_add_mmu_update(xc_handle, mmu, - ptr | MMU_MACHPHYS_UPDATE, count) ) + ptr | MMU_MACHPHYS_UPDATE, + count + gpfn_count_skip) ) goto error_out; } - if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) + if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi) ) { ERROR("Couldn't set hvm info for HVM guest.\n"); goto error_out; } xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic); if ( (e820_page = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 ) + page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == NULL ) goto error_out; memset(e820_page, 0, PAGE_SIZE); build_e820map(e820_page, v_end); @@ -281,7 +333,7 @@ static int setup_guest(int xc_handle, /* shared_info page starts its life empty. */ if ( (shared_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - shared_info_frame)) == 0 ) + shared_info_frame)) == NULL ) goto error_out; memset(shared_info, 0, PAGE_SIZE); /* Mask all upcalls... */ @@ -289,32 +341,25 @@ static int setup_guest(int xc_handle, shared_info->vcpu_info[i].evtchn_upcall_mask = 1; munmap(shared_info, PAGE_SIZE); + if ( v_end > HVM_BELOW_4G_RAM_END ) + shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; + else + shared_page_nr = (v_end >> PAGE_SHIFT) - 1; + + *store_mfn = page_array[shared_page_nr - 1]; + + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, *store_mfn); + xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); + /* Paranoia */ - shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1]; - if ( (sp = (shared_iopage_t *) xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, - shared_page_frame)) == 0 ) - goto error_out; - memset(sp, 0, PAGE_SIZE); - munmap(sp, PAGE_SIZE); + /* clean the shared IO requests page */ + if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr]) ) + goto error_out; /* clean the buffered IO requests page */ - ioreq_buffer_frame = page_array[(v_end >> PAGE_SHIFT) - 3]; - ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, - ioreq_buffer_frame); - - if ( ioreq_buffer_page == NULL ) - goto error_out; - - memset(ioreq_buffer_page, 0, PAGE_SIZE); - - munmap(ioreq_buffer_page, PAGE_SIZE); - - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, (v_end >> PAGE_SHIFT) - 2); - xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn); - - *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2]; + if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr - 2]) ) + goto error_out; + if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) ) goto error_out; @@ -395,6 +440,19 @@ static int xc_hvm_build_internal(int xc_ PERROR("Could not get info on domain"); goto error_out; } + + /* HVM domains must be put into shadow2 mode at the start of day */ + if ( xc_shadow_control(xc_handle, domid, DOM0_SHADOW2_CONTROL_OP_ENABLE, + NULL, 0, NULL, + DOM0_SHADOW2_CONTROL_FLAG_ENABLE + | DOM0_SHADOW2_CONTROL_FLAG_REFCOUNT + | DOM0_SHADOW2_CONTROL_FLAG_TRANSLATE + | DOM0_SHADOW2_CONTROL_FLAG_EXTERNAL, + NULL) ) + { + PERROR("Could not enable shadow paging for domain.\n"); + goto error_out; + } memset(ctxt, 0, sizeof(*ctxt)); diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/libxc/xc_linux_build.c Sun Aug 20 11:08:45 2006 -0400 @@ -972,7 +972,7 @@ static int setup_guest(int xc_handle, /* Enable shadow translate mode */ if ( xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE, - NULL, 0, NULL) < 0 ) + NULL, 0, NULL, 0, NULL) < 0 ) { PERROR("Could not enable translation mode"); goto error_out; diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/libxc/xc_linux_save.c Sun Aug 20 11:08:45 2006 -0400 @@ -338,13 +338,13 @@ static int analysis_phase(int xc_handle, int i; xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_CLEAN, - arr, max_pfn, NULL); + arr, max_pfn, NULL, 0, NULL); DPRINTF("#Flush\n"); for ( i = 0; i < 40; i++ ) { usleep(50000); now = llgettimeofday(); xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_PEEK, - NULL, 0, &stats); + NULL, 0, NULL, 0, &stats); DPRINTF("now= %lld faults= %" PRId32 " dirty= %" PRId32 " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", @@ -727,7 +727,7 @@ int xc_linux_save(int xc_handle, int io_ if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, - NULL, 0, NULL ) < 0) { + NULL, 0, NULL, 0, NULL) < 0) { ERR("Couldn't enable shadow mode"); goto out; } @@ -879,7 +879,7 @@ int xc_linux_save(int xc_handle, int io_ but this is fast enough for the moment. */ if (!last_iter && xc_shadow_control( xc_handle, dom, DOM0_SHADOW_CONTROL_OP_PEEK, - to_skip, max_pfn, NULL) != max_pfn) { + to_skip, max_pfn, NULL, 0, NULL) != max_pfn) { ERR("Error peeking shadow bitmap"); goto out; } @@ -1084,8 +1084,9 @@ int xc_linux_save(int xc_handle, int io_ (unsigned long)ctxt.user_regs.edx); } - if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_CLEAN, - to_send, max_pfn, &stats ) != max_pfn) { + if (xc_shadow_control(xc_handle, dom, + DOM0_SHADOW_CONTROL_OP_CLEAN, to_send, + max_pfn, NULL, 0, &stats) != max_pfn) { ERR("Error flushing shadow PT"); goto out; } @@ -1174,8 +1175,9 @@ int xc_linux_save(int xc_handle, int io_ out: if (live) { - if(xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF, - NULL, 0, NULL ) < 0) { + if(xc_shadow_control(xc_handle, dom, + DOM0_SHADOW_CONTROL_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0) { DPRINTF("Warning - couldn't disable shadow mode"); } } diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/libxc/xenctrl.h Sun Aug 20 11:08:45 2006 -0400 @@ -323,6 +323,8 @@ int xc_shadow_control(int xc_handle, unsigned int sop, unsigned long *dirty_bitmap, unsigned long pages, + unsigned long *mb, + uint32_t mode, xc_shadow_control_stats_t *stats); int xc_bvtsched_global_set(int xc_handle, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/misc/xc_shadow.c --- a/tools/misc/xc_shadow.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/misc/xc_shadow.c Sun Aug 20 11:08:45 2006 -0400 @@ -60,6 +60,8 @@ int main(int argc, char *argv[]) mode, NULL, 0, + NULL, + 0, NULL) < 0 ) { fprintf(stderr, "Error reseting performance counters: %d (%s)\n", diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/lowlevel/xc/xc.c Sun Aug 20 11:08:45 2006 -0400 @@ -672,6 +672,59 @@ static PyObject *pyxc_sedf_domain_get(Xc "weight", weight); } +static PyObject *pyxc_shadow_control(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + + uint32_t dom; + int op=0; + + static char *kwd_list[] = { "dom", "op", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list, + &dom, &op) ) + return NULL; + + if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0, NULL, 0, NULL) + < 0 ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_shadow_mem_control(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + int op; + uint32_t dom; + int mbarg = -1; + unsigned long mb; + + static char *kwd_list[] = { "dom", "mb", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list, + &dom, &mbarg) ) + return NULL; + + if ( mbarg < 0 ) + op = DOM0_SHADOW2_CONTROL_OP_GET_ALLOCATION; + else + { + mb = mbarg; + op = DOM0_SHADOW2_CONTROL_OP_SET_ALLOCATION; + } + if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0, &mb, 0, NULL) < 0 ) + return PyErr_SetFromErrno(xc_error); + + mbarg = mb; + return Py_BuildValue("i", mbarg); +} + static PyObject *pyxc_sched_credit_domain_set(XcObject *self, PyObject *args, PyObject *kwds) @@ -1121,6 +1174,22 @@ static PyMethodDef pyxc_methods[] = { "Get information about the Xen host\n" "Returns [dict]: information about Xen" " [None]: on failure.\n" }, + + { "shadow_control", + (PyCFunction)pyxc_shadow_control, + METH_VARARGS | METH_KEYWORDS, "\n" + "Set parameter for shadow pagetable interface\n" + " dom [int]: Identifier of domain.\n" + " op [int, 0]: operation\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "shadow_mem_control", + (PyCFunction)pyxc_shadow_mem_control, + METH_VARARGS | METH_KEYWORDS, "\n" + "Set or read shadow pagetable memory use\n" + " dom [int]: Identifier of domain.\n" + " mb [int, -1]: MB of shadow memory this domain should have.\n\n" + "Returns: [int] MB of shadow memory in use by this domain.\n" }, { "domain_setmaxmem", (PyCFunction)pyxc_domain_setmaxmem, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xend/XendDomain.py Sun Aug 20 11:08:45 2006 -0400 @@ -532,6 +532,30 @@ class XendDomain: except Exception, ex: raise XendError(str(ex)) + def domain_shadow_control(self, domid, op): + """Shadow page control.""" + dominfo = self.domain_lookup(domid) + try: + return xc.shadow_control(dominfo.getDomid(), op) + except Exception, ex: + raise XendError(str(ex)) + + def domain_shadow_mem_get(self, domid): + """Get shadow pagetable memory allocation.""" + dominfo = self.domain_lookup(domid) + try: + return xc.shadow_mem_control(dominfo.getDomid()) + except Exception, ex: + raise XendError(str(ex)) + + def domain_shadow_mem_set(self, domid, mb): + """Set shadow pagetable memory allocation.""" + dominfo = self.domain_lookup(domid) + try: + return xc.shadow_mem_control(dominfo.getDomid(), mb=mb) + except Exception, ex: + raise XendError(str(ex)) + def domain_sched_credit_get(self, domid): """Get credit scheduler parameters for a domain. """ diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xend/XendDomainInfo.py Sun Aug 20 11:08:45 2006 -0400 @@ -30,6 +30,7 @@ import time import time import threading import os +import math import xen.lowlevel.xc from xen.util import asserts @@ -126,16 +127,17 @@ VM_CONFIG_PARAMS = [ # don't come out of xc in the same form as they are specified in the config # file, so those are handled separately. ROUNDTRIPPING_CONFIG_ENTRIES = [ - ('uuid', str), - ('vcpus', int), - ('vcpu_avail', int), - ('cpu_weight', float), - ('memory', int), - ('maxmem', int), - ('bootloader', str), + ('uuid', str), + ('vcpus', int), + ('vcpu_avail', int), + ('cpu_weight', float), + ('memory', int), + ('shadow_memory', int), + ('maxmem', int), + ('bootloader', str), ('bootloader_args', str), - ('features', str), - ('localtime', int), + ('features', str), + ('localtime', int), ] ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS @@ -146,12 +148,13 @@ ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFI # entries written to the store that cannot be reconfigured on-the-fly. # VM_STORE_ENTRIES = [ - ('uuid', str), - ('vcpus', int), - ('vcpu_avail', int), - ('memory', int), - ('maxmem', int), - ('start_time', float), + ('uuid', str), + ('vcpus', int), + ('vcpu_avail', int), + ('memory', int), + ('shadow_memory', int), + ('maxmem', int), + ('start_time', float), ] VM_STORE_ENTRIES += VM_CONFIG_PARAMS @@ -572,6 +575,7 @@ class XendDomainInfo: defaultInfo('vcpu_avail', lambda: (1 << self.info['vcpus']) - 1) defaultInfo('memory', lambda: 0) + defaultInfo('shadow_memory', lambda: 0) defaultInfo('maxmem', lambda: 0) defaultInfo('bootloader', lambda: None) defaultInfo('bootloader_args', lambda: None) @@ -1276,14 +1280,34 @@ class XendDomainInfo: for v in range(0, self.info['max_vcpu_id']+1): xc.vcpu_setaffinity(self.domid, v, self.info['cpus']) - # set memory limit - maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024) - xc.domain_setmaxmem(self.domid, maxmem) - - # initial memory allocation - mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024) - balloon.free(mem_kb) - xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0) + # set domain maxmem in KiB + xc.domain_setmaxmem(self.domid, self.info['maxmem'] * 1024) + + m = self.image.getDomainMemory(self.info['memory'] * 1024) + + # get the domain's shadow memory requirement + sm = int(math.ceil(self.image.getDomainShadowMemory(m) / 1024.0)) + if self.info['shadow_memory'] > sm: + sm = self.info['shadow_memory'] + + # Make sure there's enough RAM available for the domain + balloon.free(m + sm * 1024) + + # Set up the shadow memory + sm = xc.shadow_mem_control(self.domid, mb=sm) + self.info['shadow_memory'] = sm + + init_reservation = self.info['memory'] * 1024 + if os.uname()[4] in ('ia64', 'ppc64'): + # Workaround for architectures that don't yet support + # ballooning. + init_reservation = m + # Following line from xiantao.zhang@xxxxxxxxx + # Needed for IA64 until supports ballooning -- okay for PPC64? + xc.domain_setmaxmem(self.domid, m) + + xc.domain_memory_increase_reservation(self.domid, init_reservation, + 0, 0) self.createChannels() @@ -1518,13 +1542,12 @@ class XendDomainInfo: return self.getDeviceController(dev_type).sxpr(devid) - def device_configure(self, dev_config, devid): + def device_configure(self, dev_config): """Configure an existing device. @param dev_config: device configuration - @param devid: device id """ deviceClass = sxp.name(dev_config) - self.reconfigureDevice(deviceClass, devid, dev_config) + self.reconfigureDevice(deviceClass, None, dev_config) def pause(self): diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/XendLogging.py --- a/tools/python/xen/xend/XendLogging.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xend/XendLogging.py Sun Aug 20 11:08:45 2006 -0400 @@ -57,7 +57,7 @@ class XendRotatingFileHandler(logging.ha self.setCloseOnExec() def doRollover(self): - logging.handlers.RotatingFileHandler.doRollover() + logging.handlers.RotatingFileHandler.doRollover(self) self.setCloseOnExec() # NB yes accessing 'self.stream' violates OO encapsulation somewhat, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xend/image.py Sun Aug 20 11:08:45 2006 -0400 @@ -145,6 +145,12 @@ class ImageHandler: def getRequiredMemory(self, mem_kb): return mem_kb + + def getDomainShadowMemory(self, mem_kb): + """@return The minimum shadow memory required, in KiB, for a domain + with mem_kb KiB of RAM.""" + # PV domains don't need any shadow memory + return 0 def buildDomain(self): """Build the domain. Define in subclass.""" @@ -372,6 +378,32 @@ class HVMImageHandler(ImageHandler): os.waitpid(self.pid, 0) self.pid = 0 + def getDomainMemory(self, mem_kb): + """@see ImageHandler.getDomainMemory""" + if os.uname()[4] == 'ia64': + page_kb = 16 + # ROM size for guest firmware, ioreq page and xenstore page + extra_pages = 1024 + 2 + else: + page_kb = 4 + # This was derived emperically: + # 2.4 MB overhead per 1024 MB RAM + 8 MB constant + # + 4 to avoid low-memory condition + extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12; + extra_pages = int( math.ceil( extra_mb*1024 / page_kb )) + return mem_kb + extra_pages * page_kb + + def getDomainShadowMemory(self, mem_kb): + """@return The minimum shadow memory required, in KiB, for a domain + with mem_kb KiB of RAM.""" + if os.uname()[4] in ('ia64', 'ppc64'): + # Explicit shadow memory is not a concept + return 0 + else: + # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than + # the minimum that Xen would allocate if no value were given. + return 1024 * self.vm.getVCpuCount() + mem_kb / 256 + def register_shutdown_watch(self): """ add xen store watch on control/shutdown """ self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \ diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xend/server/DevController.py Sun Aug 20 11:08:45 2006 -0400 @@ -206,15 +206,9 @@ class DevController: """ devid = int(devid) - - frontpath = self.frontendPath(devid) - backpath = xstransact.Read(frontpath, "backend") - - if backpath: - xstransact.Write(backpath, 'state', str(xenbusState['Closing'])) - else: - raise VmError("Device %s not connected" % devid) - + + self.writeBackend(devid, 'state', str(xenbusState['Closing'])) + def configurations(self): return map(self.configuration, self.deviceIDs()) @@ -355,6 +349,16 @@ class DevController: return map(int, xstransact.List(fe)) + def writeBackend(self, devid, *args): + frontpath = self.frontendPath(devid) + backpath = xstransact.Read(frontpath, "backend") + + if backpath: + xstransact.Write(backpath, *args) + else: + raise VmError("Device %s not connected" % devid) + + ## private: def addStoreEntries(self, config, devid, backDetails, frontDetails): diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/server/XMLRPCServer.py --- a/tools/python/xen/xend/server/XMLRPCServer.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xend/server/XMLRPCServer.py Sun Aug 20 11:08:45 2006 -0400 @@ -24,6 +24,7 @@ from xen.util.xmlrpclib2 import UnixXMLR from xen.xend.XendClient import XML_RPC_SOCKET, ERROR_INVALID_DOMAIN from xen.xend.XendError import * +from xen.xend.XendLogging import log from types import ListType def lookup(domid): @@ -74,7 +75,8 @@ def get_log(): finally: f.close() -methods = ['device_create', 'destroyDevice', 'getDeviceSxprs', +methods = ['device_create', 'device_configure', 'destroyDevice', + 'getDeviceSxprs', 'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown', 'send_sysrq', 'getVCPUInfo', 'waitForDevices'] diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xend/server/blkif.py Sun Aug 20 11:08:45 2006 -0400 @@ -13,7 +13,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> -# Copyright (C) 2005 XenSource Ltd +# Copyright (C) 2005, 2006 XenSource Inc. #============================================================================ @@ -76,6 +76,23 @@ class BlkifController(DevController): return (devid, back, front) + def reconfigureDevice(self, _, config): + """@see DevController.reconfigureDevice""" + (devid, new_back, new_front) = self.getDeviceDetails(config) + + (dev, mode) = self.readBackend(devid, 'dev', 'mode') + dev_type = self.readFrontend(devid, 'device-type') + + if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and + dev == new_back['dev'] and mode == 'r'): + self.writeBackend(devid, + 'type', new_back['type'], + 'params', new_back['params']) + else: + raise VmError('Refusing to reconfigure device %s:%d to %s' % + (self.deviceClass, devid, config)) + + def configuration(self, devid): """@see DevController.configuration""" diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xm/create.py Sun Aug 20 11:08:45 2006 -0400 @@ -157,6 +157,10 @@ gopts.var('maxmem', val='MEMORY', gopts.var('maxmem', val='MEMORY', fn=set_int, default=None, use="Maximum domain memory in MB.") + +gopts.var('shadow_memory', val='MEMORY', + fn=set_int, default=0, + use="Domain shadow memory in MB.") gopts.var('cpu', val='CPU', fn=set_int, default=None, @@ -666,8 +670,9 @@ def make_config(vals): if v: config.append([n, v]) - map(add_conf, ['name', 'memory', 'maxmem', 'restart', 'on_poweroff', - 'on_reboot', 'on_crash', 'vcpus', 'features']) + map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory', + 'restart', 'on_poweroff', 'on_reboot', 'on_crash', + 'vcpus', 'features']) if vals.uuid is not None: config.append(['uuid', vals.uuid]) diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/python/xen/xm/main.py Sun Aug 20 11:08:45 2006 -0400 @@ -113,6 +113,8 @@ block_detach_help = """block-detach <Do or the device name as mounted in the guest""" block_list_help = "block-list <DomId> [--long] List virtual block devices for a domain" +block_configure_help = """block-configure <DomId> <BackDev> <FrontDev> <Mode> + [BackDomId] Change block device configuration""" network_attach_help = """network-attach <DomID> [script=<script>] [ip=<ip>] [mac=<mac>] [bridge=<bridge>] [backend=<backDomID>] Create a new virtual network device """ @@ -199,6 +201,7 @@ device_commands = [ "block-attach", "block-detach", "block-list", + "block-configure", "network-attach", "network-detach", "network-list", @@ -1055,9 +1058,8 @@ def xm_vtpm_list(args): "%(be-path)-30s " % ni) -def xm_block_attach(args): - arg_check(args, 'block-attach', 4, 5) - + +def parse_block_configuration(args): dom = args[0] if args[1].startswith('tap:'): @@ -1087,7 +1089,21 @@ def xm_block_attach(args): traceback.print_exc(limit=1) sys.exit(1) + return (dom, vbd) + + +def xm_block_attach(args): + arg_check(args, 'block-attach', 4, 5) + + (dom, vbd) = parse_block_configuration(args) server.xend.domain.device_create(dom, vbd) + + +def xm_block_configure(args): + arg_check(args, 'block-configure', 4, 5) + + (dom, vbd) = parse_block_configuration(args) + server.xend.domain.device_configure(dom, vbd) def xm_network_attach(args): @@ -1201,6 +1217,7 @@ commands = { "block-attach": xm_block_attach, "block-detach": xm_block_detach, "block-list": xm_block_list, + "block-configure": xm_block_configure, # network "network-attach": xm_network_attach, "network-detach": xm_network_detach, diff -r 96d6f9cfed6e -r 4cffec02b478 tools/xenmon/Makefile --- a/tools/xenmon/Makefile Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/xenmon/Makefile Sun Aug 20 11:08:45 2006 -0400 @@ -25,7 +25,7 @@ CFLAGS += -I $(XEN_LIBXC) CFLAGS += -I $(XEN_LIBXC) LDFLAGS += -L $(XEN_LIBXC) -BIN = setmask xenbaked +BIN = xentrace_setmask xenbaked SCRIPTS = xenmon.py .PHONY: all @@ -35,10 +35,10 @@ build: $(BIN) build: $(BIN) .PHONY: install -install: xenbaked setmask +install: build [ -d $(DESTDIR)$(sbindir) ] || $(INSTALL_DIR) $(DESTDIR)$(sbindir) $(INSTALL_PROG) xenbaked $(DESTDIR)$(sbindir)/xenbaked - $(INSTALL_PROG) setmask $(DESTDIR)$(sbindir)/setmask + $(INSTALL_PROG) xentrace_setmask $(DESTDIR)$(sbindir)/xentrace_setmask $(INSTALL_PROG) xenmon.py $(DESTDIR)$(sbindir)/xenmon.py .PHONY: clean @@ -48,5 +48,5 @@ clean: %: %.c Makefile $(CC) $(CFLAGS) $(LDFLAGS) -lxenctrl -o $@ $< - - +xentrace_%: %.c Makefile + $(CC) $(CFLAGS) $(LDFLAGS) -lxenctrl -o $@ $< diff -r 96d6f9cfed6e -r 4cffec02b478 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/xenstore/Makefile Sun Aug 20 11:08:45 2006 -0400 @@ -98,7 +98,7 @@ libxenstore.so.$(MAJOR).$(MINOR): xs.opi $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so.$(MAJOR) -shared -o $@ $^ -lpthread libxenstore.a: xs.o xs_lib.o - ar rcs libxenstore.a $^ + $(AR) rcs libxenstore.a $^ .PHONY: clean clean: testsuite-clean diff -r 96d6f9cfed6e -r 4cffec02b478 tools/xentrace/Makefile --- a/tools/xentrace/Makefile Sun Aug 20 11:07:52 2006 -0400 +++ b/tools/xentrace/Makefile Sun Aug 20 11:08:45 2006 -0400 @@ -14,7 +14,7 @@ HDRS = $(wildcard *.h) HDRS = $(wildcard *.h) OBJS = $(patsubst %.c,%.o,$(wildcard *.c)) -BIN = xentrace setsize +BIN = xentrace xentrace_setsize LIBBIN = SCRIPTS = xentrace_format MAN1 = $(wildcard *.1) @@ -58,3 +58,5 @@ clean: %: %.c $(HDRS) Makefile $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl +xentrace_%: %.c $(HDRS) Makefile + $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl diff -r 96d6f9cfed6e -r 4cffec02b478 xen/Rules.mk --- a/xen/Rules.mk Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/Rules.mk Sun Aug 20 11:08:45 2006 -0400 @@ -8,6 +8,9 @@ perfc_arrays?= n perfc_arrays?= n crash_debug ?= n +XEN_ROOT=$(BASEDIR)/.. +include $(XEN_ROOT)/Config.mk + # Hardcoded configuration implications and dependencies. # Do this is a neater way if it becomes unwieldy. ifeq ($(debug),y) @@ -16,9 +19,6 @@ ifeq ($(perfc_arrays),y) ifeq ($(perfc_arrays),y) perfc := y endif - -XEN_ROOT=$(BASEDIR)/.. -include $(XEN_ROOT)/Config.mk # Set ARCH/SUBARCH appropriately. override COMPILE_SUBARCH := $(XEN_COMPILE_ARCH) diff -r 96d6f9cfed6e -r 4cffec02b478 xen/acm/acm_core.c --- a/xen/acm/acm_core.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/acm/acm_core.c Sun Aug 20 11:08:45 2006 -0400 @@ -206,7 +206,7 @@ acm_setup(unsigned int *initrdidx, for (i = mbi->mods_count-1; i >= 1; i--) { struct acm_policy_buffer *pol; - char *_policy_start; + char *_policy_start; unsigned long _policy_len; #if defined(__i386__) _policy_start = (char *)(initial_images_start + (mod[i].mod_start-mod[0].mod_start)); @@ -342,7 +342,7 @@ acm_init_domain_ssid(domid_t id, ssidref { printk("%s: ERROR instantiating individual ssids for domain 0x%02x.\n", __func__, subj->domain_id); - acm_free_domain_ssid(ssid); + acm_free_domain_ssid(ssid); put_domain(subj); return ACM_INIT_SSID_ERROR; } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/acm/acm_simple_type_enforcement_hooks.c --- a/xen/acm/acm_simple_type_enforcement_hooks.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c Sun Aug 20 11:08:45 2006 -0400 @@ -86,10 +86,10 @@ int acm_init_ste_policy(void) /* init stats */ atomic_set(&(ste_bin_pol.ec_eval_count), 0); - atomic_set(&(ste_bin_pol.ec_denied_count), 0); + atomic_set(&(ste_bin_pol.ec_denied_count), 0); atomic_set(&(ste_bin_pol.ec_cachehit_count), 0); atomic_set(&(ste_bin_pol.gt_eval_count), 0); - atomic_set(&(ste_bin_pol.gt_denied_count), 0); + atomic_set(&(ste_bin_pol.gt_denied_count), 0); atomic_set(&(ste_bin_pol.gt_cachehit_count), 0); return ACM_OK; } @@ -100,7 +100,7 @@ ste_init_domain_ssid(void **ste_ssid, ss ste_init_domain_ssid(void **ste_ssid, ssidref_t ssidref) { int i; - struct ste_ssid *ste_ssidp = xmalloc(struct ste_ssid); + struct ste_ssid *ste_ssidp = xmalloc(struct ste_ssid); traceprintk("%s.\n", __func__); if (ste_ssidp == NULL) @@ -309,7 +309,7 @@ ste_set_policy(u8 *buf, u32 buf_size) sizeof(domaintype_t), ste_buf->ste_max_ssidrefs*ste_buf->ste_max_types); - /* 2. now re-calculate sharing decisions based on running domains; + /* 2. now re-calculate sharing decisions based on running domains; * this can fail if new policy is conflicting with sharing of running domains * now: reject violating new policy; future: adjust sharing through revoking sharing */ if (ste_init_state(ste_buf, (domaintype_t *)ssidrefsbuf)) { @@ -349,11 +349,11 @@ ste_dump_stats(u8 *buf, u16 buf_len) stats.ec_eval_count = htonl(atomic_read(&ste_bin_pol.ec_eval_count)); stats.gt_eval_count = htonl(atomic_read(&ste_bin_pol.gt_eval_count)); stats.ec_denied_count = htonl(atomic_read(&ste_bin_pol.ec_denied_count)); - stats.gt_denied_count = htonl(atomic_read(&ste_bin_pol.gt_denied_count)); + stats.gt_denied_count = htonl(atomic_read(&ste_bin_pol.gt_denied_count)); stats.ec_cachehit_count = htonl(atomic_read(&ste_bin_pol.ec_cachehit_count)); stats.gt_cachehit_count = htonl(atomic_read(&ste_bin_pol.gt_cachehit_count)); - if (buf_len < sizeof(struct acm_ste_stats_buffer)) + if (buf_len < sizeof(struct acm_ste_stats_buffer) return -ENOMEM; memcpy(buf, &stats, sizeof(struct acm_ste_stats_buffer)); @@ -523,8 +523,8 @@ ste_pre_eventchannel_unbound(domid_t id1 cache_result(subj, obj); ret = ACM_ACCESS_PERMITTED; } else { - atomic_inc(&ste_bin_pol.ec_denied_count); - ret = ACM_ACCESS_DENIED; + atomic_inc(&ste_bin_pol.ec_denied_count); + ret = ACM_ACCESS_DENIED; } out: if (obj != NULL) @@ -569,8 +569,8 @@ ste_pre_eventchannel_interdomain(domid_t cache_result(subj, obj); ret = ACM_ACCESS_PERMITTED; } else { - atomic_inc(&ste_bin_pol.ec_denied_count); - ret = ACM_ACCESS_DENIED; + atomic_inc(&ste_bin_pol.ec_denied_count); + ret = ACM_ACCESS_DENIED; } out: if (obj != NULL) @@ -599,9 +599,9 @@ ste_pre_grant_map_ref (domid_t id) { cache_result(subj, obj); ret = ACM_ACCESS_PERMITTED; } else { - atomic_inc(&ste_bin_pol.gt_denied_count); + atomic_inc(&ste_bin_pol.gt_denied_count); printkd("%s: ACCESS DENIED!\n", __func__); - ret = ACM_ACCESS_DENIED; + ret = ACM_ACCESS_DENIED; } if (obj != NULL) put_domain(obj); @@ -637,8 +637,8 @@ ste_pre_grant_setup (domid_t id) { cache_result(subj, obj); ret = ACM_ACCESS_PERMITTED; } else { - atomic_inc(&ste_bin_pol.gt_denied_count); - ret = ACM_ACCESS_DENIED; + atomic_inc(&ste_bin_pol.gt_denied_count); + ret = ACM_ACCESS_DENIED; } if (obj != NULL) put_domain(obj); diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/ia64/Rules.mk Sun Aug 20 11:08:45 2006 -0400 @@ -2,6 +2,7 @@ # ia64-specific definitions HAS_ACPI := y +HAS_VGA := y VALIDATE_VT ?= n no_warns ?= n diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/ia64/xen/domain.c Sun Aug 20 11:08:45 2006 -0400 @@ -864,6 +864,7 @@ int construct_dom0(struct domain *d, { int i, rc; start_info_t *si; + dom0_vga_console_info_t *ci; struct vcpu *v = d->vcpu[0]; unsigned long max_pages; @@ -1000,6 +1001,9 @@ int construct_dom0(struct domain *d, //if ( initrd_len != 0 ) // memcpy((void *)vinitrd_start, initrd_start, initrd_len); + BUILD_BUG_ON(sizeof(start_info_t) + sizeof(dom0_vga_console_info_t) + + sizeof(struct ia64_boot_param) > PAGE_SIZE); + /* Set up start info area. */ d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT; start_info_page = assign_new_domain_page(d, pstart_info); @@ -1034,7 +1038,8 @@ int construct_dom0(struct domain *d, strncpy((char *)si->cmd_line, dom0_command_line, sizeof(si->cmd_line)); si->cmd_line[sizeof(si->cmd_line)-1] = 0; - bp = (struct ia64_boot_param *)(si + 1); + bp = (struct ia64_boot_param *)((unsigned char *)si + + sizeof(start_info_t)); bp->command_line = pstart_info + offsetof (start_info_t, cmd_line); /* We assume console has reached the last line! */ @@ -1048,6 +1053,16 @@ int construct_dom0(struct domain *d, (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024); bp->initrd_size = ia64_boot_param->initrd_size; + ci = (dom0_vga_console_info_t *)((unsigned char *)si + + sizeof(start_info_t) + + sizeof(struct ia64_boot_param)); + + if (fill_console_start_info(ci)) { + si->console.dom0.info_off = sizeof(start_info_t) + + sizeof(struct ia64_boot_param); + si->console.dom0.info_size = sizeof(dom0_vga_console_info_t); + } + vcpu_init_regs (v); vcpu_regs(v)->r28 = bp_mpa; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/ia64/xen/mm.c Sun Aug 20 11:08:45 2006 -0400 @@ -1746,6 +1746,11 @@ int get_page_type(struct page_info *page return 1; } +int memory_is_conventional_ram(paddr_t p) +{ + return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY); +} + /* * Local variables: * mode: C diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/Makefile Sun Aug 20 11:08:45 2006 -0400 @@ -8,7 +8,6 @@ subdir-$(x86_64) += x86_64 subdir-$(x86_64) += x86_64 obj-y += apic.o -obj-y += audit.o obj-y += bitops.o obj-y += compat.o obj-y += delay.o @@ -41,12 +40,21 @@ obj-y += x86_emulate.o obj-y += x86_emulate.o ifneq ($(pae),n) -obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o +obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s3.o shadow2_g3_on_s3.o else -obj-$(x86_32) += shadow32.o +obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s2.o endif -obj-$(x86_64) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o +obj-$(x86_64) += shadow2-common.o shadow2_g4_on_s4.o shadow2_g3_on_s3.o \ + shadow2_g2_on_s3.o + +guest_levels = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(subst shadow2_,,$(1)))))) +shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(subst shadow2_,,$(1)))))) +shadow2_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \ + -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1)) + +shadow2_%.o: shadow2.c $(HDRS) Makefile + $(CC) $(CFLAGS) $(call shadow2_defns,$(@F)) -c $< -o $@ obj-$(crash_debug) += gdbstub.o diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/Rules.mk Sun Aug 20 11:08:45 2006 -0400 @@ -2,6 +2,7 @@ # x86-specific definitions HAS_ACPI := y +HAS_VGA := y # # If you change any of these configuration options then you must diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/acpi/boot.c --- a/xen/arch/x86/acpi/boot.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/acpi/boot.c Sun Aug 20 11:08:45 2006 -0400 @@ -107,7 +107,7 @@ char *__acpi_map_table(unsigned long phy int idx; if (phys + size < 8 * 1024 * 1024) - return __va(phys); + return __va(phys); offset = phys & (PAGE_SIZE - 1); mapped_size = PAGE_SIZE - offset; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/apic.c Sun Aug 20 11:08:45 2006 -0400 @@ -10,8 +10,8 @@ * thanks to Eric Gilmore * and Rolf G. Tews * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. + * Maciej W. Rozycki : Various updates and fixes. + * Mikael Pettersson : Power Management for UP-APIC. * Pavel Machek and * Mikael Pettersson : PM converted to driver model. */ @@ -166,7 +166,7 @@ void clear_local_APIC(void) apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); #endif v = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_INTEGRATED(v)) { /* !82489DX */ + if (APIC_INTEGRATED(v)) { /* !82489DX */ if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ apic_write(APIC_ESR, 0); apic_read(APIC_ESR); @@ -878,9 +878,9 @@ int __init calibrate_APIC_clock(void) ((long)(t2-t1)/LOOPS)%(1000000/HZ)); apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%ld.%04ld MHz.\n", - result/(1000000/HZ), - result%(1000000/HZ)); + "%ld.%04ld MHz.\n", + result/(1000000/HZ), + result%(1000000/HZ)); /* set up multipliers for accurate timer code */ bus_freq = result*HZ; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/boot/x86_32.S --- a/xen/arch/x86/boot/x86_32.S Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/boot/x86_32.S Sun Aug 20 11:08:45 2006 -0400 @@ -7,22 +7,22 @@ #define SECONDARY_CPU_FLAG 0xA5A5A5A5 - .text + .text ENTRY(start) ENTRY(stext) ENTRY(_stext) jmp __start - .align 4 + .align 4 /*** MULTIBOOT HEADER ****/ /* Magic number indicating a Multiboot header. */ - .long 0x1BADB002 - /* Flags to bootloader (see Multiboot spec). */ - .long 0x00000003 - /* Checksum: must be the negated sum of the first two fields. */ - .long -0x1BADB005 + .long 0x1BADB002 + /* Flags to bootloader (see Multiboot spec). */ + .long 0x00000003 + /* Checksum: must be the negated sum of the first two fields. */ + .long -0x1BADB005 not_multiboot_msg: .asciz "ERR: Not a Multiboot bootloader!" @@ -57,8 +57,8 @@ 1: lss stack_start-__PAGE_OFFSE add $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp /* Reset EFLAGS (subsumes CLI and CLD). */ - pushl $0 - popf + pushl $0 + popf /* Set up FPU. */ fninit @@ -172,7 +172,7 @@ 1: /* Paging enabled, so we can now je start_secondary /* Call into main C routine. This should never return.*/ - call __start_xen + call __start_xen ud2 /* Force a panic (invalid opcode). */ /* This is the default interrupt handler. */ @@ -203,19 +203,19 @@ ENTRY(stack_start) .word 0 idt_descr: - .word 256*8-1 + .word 256*8-1 idt: - .long idt_table + .long idt_table .word 0 gdt_descr: - .word LAST_RESERVED_GDT_BYTE + .word LAST_RESERVED_GDT_BYTE gdt: .long gdt_table - FIRST_RESERVED_GDT_BYTE .word 0 nopaging_gdt_descr: - .word LAST_RESERVED_GDT_BYTE + .word LAST_RESERVED_GDT_BYTE .long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET .org 0x1000 diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/cpu/amd.c --- a/xen/arch/x86/cpu/amd.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/cpu/amd.c Sun Aug 20 11:08:45 2006 -0400 @@ -280,7 +280,7 @@ static void __init init_amd(struct cpuin set_bit(X86_FEATURE_K8, c->x86_capability); break; case 6: - set_bit(X86_FEATURE_K7, c->x86_capability); + set_bit(X86_FEATURE_K7, c->x86_capability); break; } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/cpu/cyrix.c --- a/xen/arch/x86/cpu/cyrix.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/cpu/cyrix.c Sun Aug 20 11:08:45 2006 -0400 @@ -145,7 +145,7 @@ static void __init set_cx86_inc(void) setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); /* PCR0 -- Performance Control */ /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/cpu/transmeta.c --- a/xen/arch/x86/cpu/transmeta.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/cpu/transmeta.c Sun Aug 20 11:08:45 2006 -0400 @@ -19,7 +19,7 @@ static void __init init_transmeta(struct max = cpuid_eax(0x80860000); cpu_rev = 0; if ( max >= 0x80860001 ) { - cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); + cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); if (cpu_rev != 0x02000000) { printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", (cpu_rev >> 24) & 0xff, diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/delay.c --- a/xen/arch/x86/delay.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/delay.c Sun Aug 20 11:08:45 2006 -0400 @@ -1,13 +1,13 @@ /* - * Precise Delay Loops for i386 + * Precise Delay Loops for i386 * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx> * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. */ #include <xen/config.h> diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/dmi_scan.c --- a/xen/arch/x86/dmi_scan.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/dmi_scan.c Sun Aug 20 11:08:45 2006 -0400 @@ -199,11 +199,11 @@ static __init __attribute__((unused)) in static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d) { if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); + printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); disable_acpi(); } else { printk(KERN_NOTICE - "Warning: DMI blacklist says broken, but acpi forced\n"); + "Warning: DMI blacklist says broken, but acpi forced\n"); } return 0; } @@ -214,12 +214,12 @@ static __init __attribute__((unused)) in static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d) { if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); disable_acpi(); - acpi_ht = 1; + acpi_ht = 1; } else { printk(KERN_NOTICE - "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); + "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); } return 0; } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/dom0_ops.c Sun Aug 20 11:08:45 2006 -0400 @@ -84,12 +84,12 @@ long arch_do_dom0_op(struct dom0_op *op, case DOM0_SHADOW_CONTROL: { - struct domain *d; + struct domain *d; ret = -ESRCH; d = find_domain_by_id(op->u.shadow_control.domain); if ( d != NULL ) { - ret = shadow_mode_control(d, &op->u.shadow_control); + ret = shadow2_control_op(d, &op->u.shadow_control, u_dom0_op); put_domain(d); copy_to_guest(u_dom0_op, op, 1); } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/domain.c Sun Aug 20 11:08:45 2006 -0400 @@ -134,13 +134,6 @@ struct vcpu *alloc_vcpu_struct(struct do v->arch.perdomain_ptes = d->arch.mm_perdomain_pt + (vcpu_id << GDT_LDT_VCPU_SHIFT); - v->arch.guest_vtable = __linear_l2_table; - v->arch.shadow_vtable = __shadow_linear_l2_table; -#if defined(__x86_64__) - v->arch.guest_vl3table = __linear_l3_table; - v->arch.guest_vl4table = __linear_l4_table; -#endif - pae_l3_cache_init(&v->arch.pae_l3_cache); return v; @@ -155,9 +148,7 @@ int arch_domain_create(struct domain *d) { l1_pgentry_t gdt_l1e; int vcpuid, pdpt_order; -#ifdef __x86_64__ int i; -#endif pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order); @@ -202,8 +193,12 @@ int arch_domain_create(struct domain *d) #endif /* __x86_64__ */ - shadow_lock_init(d); - INIT_LIST_HEAD(&d->arch.free_shadow_frames); + shadow2_lock_init(d); + for ( i = 0; i <= SHADOW2_MAX_ORDER; i++ ) + INIT_LIST_HEAD(&d->arch.shadow2_freelists[i]); + INIT_LIST_HEAD(&d->arch.shadow2_p2m_freelist); + INIT_LIST_HEAD(&d->arch.shadow2_p2m_inuse); + INIT_LIST_HEAD(&d->arch.shadow2_toplevel_shadows); if ( !is_idle_domain(d) ) { @@ -234,6 +229,8 @@ int arch_domain_create(struct domain *d) void arch_domain_destroy(struct domain *d) { + shadow2_final_teardown(d); + free_xenheap_pages( d->arch.mm_perdomain_pt, get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t))); @@ -328,31 +325,35 @@ int arch_set_info_guest( if ( !hvm_initialize_guest_resources(v) ) return -EINVAL; } - else if ( shadow_mode_refcounts(d) ) - { - if ( !get_page(mfn_to_page(cr3_pfn), d) ) + else + { + if ( !get_page_and_type(mfn_to_page(cr3_pfn), d, + PGT_base_page_table) ) { destroy_gdt(v); return -EINVAL; } - } - else - { - if ( !get_page_and_type(mfn_to_page(cr3_pfn), d, - PGT_base_page_table) ) - { - destroy_gdt(v); - return -EINVAL; - } - } - - update_pagetables(v); + } + + /* Shadow2: make sure the domain has enough shadow memory to + * boot another vcpu */ + if ( shadow2_mode_enabled(d) + && d->arch.shadow2_total_pages < shadow2_min_acceptable_pages(d) ) + { + destroy_gdt(v); + return -ENOMEM; + } if ( v->vcpu_id == 0 ) update_domain_wallclock_time(d); /* Don't redo final setup */ set_bit(_VCPUF_initialised, &v->vcpu_flags); + + if ( shadow2_mode_enabled(d) ) + shadow2_update_paging_modes(v); + + update_cr3(v); return 0; } @@ -555,7 +556,8 @@ static void load_segments(struct vcpu *n n->vcpu_info->evtchn_upcall_mask = 1; regs->entry_vector = TRAP_syscall; - regs->rflags &= 0xFFFCBEFFUL; + regs->rflags &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF| + X86_EFLAGS_NT|X86_EFLAGS_TF); regs->ss = __GUEST_SS; regs->rsp = (unsigned long)(rsp-11); regs->cs = __GUEST_CS; @@ -669,7 +671,6 @@ static void __context_switch(void) loaddebug(&n->arch.guest_context, 6); loaddebug(&n->arch.guest_context, 7); } - n->arch.ctxt_switch_to(n); } @@ -927,29 +928,34 @@ void domain_relinquish_resources(struct /* Drop the in-use references to page-table bases. */ for_each_vcpu ( d, v ) { - if ( (pfn = pagetable_get_pfn(v->arch.guest_table)) != 0 ) - { - if ( !shadow_mode_refcounts(d) ) - put_page_type(mfn_to_page(pfn)); - put_page(mfn_to_page(pfn)); - + /* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling, + * or sh2_update_paging_modes()) */ + pfn = pagetable_get_pfn(v->arch.guest_table); + if ( pfn != 0 ) + { + if ( shadow2_mode_refcounts(d) ) + put_page(mfn_to_page(pfn)); + else + put_page_and_type(mfn_to_page(pfn)); v->arch.guest_table = pagetable_null(); } - if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 ) - { - if ( !shadow_mode_refcounts(d) ) - put_page_type(mfn_to_page(pfn)); - put_page(mfn_to_page(pfn)); - +#ifdef __x86_64__ + /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ + pfn = pagetable_get_pfn(v->arch.guest_table_user); + if ( pfn != 0 ) + { + put_page_and_type(mfn_to_page(pfn)); v->arch.guest_table_user = pagetable_null(); } +#endif } if ( d->vcpu[0] && hvm_guest(d->vcpu[0]) ) hvm_relinquish_guest_resources(d); - shadow_mode_disable(d); + /* Tear down shadow mode stuff. */ + shadow2_teardown(d); /* * Relinquish GDT mappings. No need for explicit unmapping of the LDT as @@ -964,26 +970,23 @@ void domain_relinquish_resources(struct /* Free page used by xen oprofile buffer */ free_xenoprof_pages(d); - } void arch_dump_domain_info(struct domain *d) { - if ( shadow_mode_enabled(d) ) - { - printk(" shadow mode: "); - if ( shadow_mode_refcounts(d) ) + if ( shadow2_mode_enabled(d) ) + { + printk(" shadow2 mode: "); + if ( d->arch.shadow2_mode & SHM2_enable ) + printk("enabled "); + if ( shadow2_mode_refcounts(d) ) printk("refcounts "); - if ( shadow_mode_write_all(d) ) - printk("write_all "); - if ( shadow_mode_log_dirty(d) ) + if ( shadow2_mode_log_dirty(d) ) printk("log_dirty "); - if ( shadow_mode_translate(d) ) + if ( shadow2_mode_translate(d) ) printk("translate "); - if ( shadow_mode_external(d) ) + if ( shadow2_mode_external(d) ) printk("external "); - if ( shadow_mode_wr_pt_pte(d) ) - printk("wr_pt_pte "); printk("\n"); } } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/domain_build.c Sun Aug 20 11:08:45 2006 -0400 @@ -119,7 +119,7 @@ static void process_dom0_ioports_disable "in dom0_ioports_disable, skipping\n", t); continue; } - + if ( *u == '\0' ) io_to = io_from; else if ( *u == '-' ) @@ -469,7 +469,7 @@ int construct_dom0(struct domain *d, { if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) { - l1start = l1tab = (l1_pgentry_t *)mpt_alloc; + l1start = l1tab = (l1_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT); l2tab++; @@ -661,7 +661,7 @@ int construct_dom0(struct domain *d, if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) ) { if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) ) - l3start = l3tab = l4e_to_l3e(*++l4tab); + l3start = l3tab = l4e_to_l3e(*++l4tab); l2start = l2tab = l3e_to_l2e(*l3tab); } l1start = l1tab = l2e_to_l1e(*l2tab); @@ -683,8 +683,11 @@ int construct_dom0(struct domain *d, for ( i = 1; i < opt_dom0_max_vcpus; i++ ) (void)alloc_vcpu(d, i, i); - /* Set up monitor table */ - update_pagetables(v); + /* Set up CR3 value for write_ptbase */ + if ( shadow2_mode_enabled(v->domain) ) + shadow2_update_paging_modes(v); + else + update_cr3(v); /* Install the new page tables. */ local_irq_disable(); @@ -796,10 +799,8 @@ int construct_dom0(struct domain *d, new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start); if ( opt_dom0_shadow ) - { - shadow_mode_enable(d, SHM_enable); - update_pagetables(v); - } + if ( shadow2_test_enable(d) == 0 ) + shadow2_update_paging_modes(v); if ( supervisor_mode_kernel ) { diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/extable.c --- a/xen/arch/x86/extable.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/extable.c Sun Aug 20 11:08:45 2006 -0400 @@ -41,8 +41,8 @@ void sort_exception_tables(void) static inline unsigned long search_one_table(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) + const struct exception_table_entry *last, + unsigned long value) { const struct exception_table_entry *mid; long diff; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/genapic/bigsmp.c --- a/xen/arch/x86/genapic/bigsmp.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/genapic/bigsmp.c Sun Aug 20 11:08:45 2006 -0400 @@ -41,7 +41,7 @@ static __init int probe_bigsmp(void) dmi_bigsmp = 1; else dmi_check_system(bigsmp_dmi_table); - return dmi_bigsmp; + return dmi_bigsmp; } struct genapic apic_bigsmp = { diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/genapic/es7000.h --- a/xen/arch/x86/genapic/es7000.h Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/genapic/es7000.h Sun Aug 20 11:08:45 2006 -0400 @@ -71,8 +71,8 @@ struct es7000_mem_info { unsigned char type; unsigned char length; unsigned char resv[6]; - unsigned long long start; - unsigned long long size; + unsigned long long start; + unsigned long long size; }; struct es7000_oem_table { diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/genapic/probe.c --- a/xen/arch/x86/genapic/probe.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/genapic/probe.c Sun Aug 20 11:08:45 2006 -0400 @@ -65,7 +65,7 @@ void __init generic_apic_probe(void) for (i = 0; !changed && apic_probe[i]; i++) { if (apic_probe[i]->probe()) { changed = 1; - genapic = apic_probe[i]; + genapic = apic_probe[i]; } } if (!changed) diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/hvm.c Sun Aug 20 11:08:45 2006 -0400 @@ -30,6 +30,7 @@ #include <xen/hypercall.h> #include <xen/guest_access.h> #include <xen/event.h> +#include <xen/shadow.h> #include <asm/current.h> #include <asm/e820.h> #include <asm/io.h> @@ -42,10 +43,6 @@ #include <asm/spinlock.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> -#include <asm/shadow.h> -#if CONFIG_PAGING_LEVELS >= 3 -#include <asm/shadow_64.h> -#endif #include <public/sched.h> #include <public/hvm/ioreq.h> #include <public/version.h> @@ -61,7 +58,7 @@ static void hvm_zap_mmio_range( static void hvm_zap_mmio_range( struct domain *d, unsigned long pfn, unsigned long nr_pfn) { - unsigned long i, val = INVALID_MFN; + unsigned long i; ASSERT(d == current->domain); @@ -70,7 +67,8 @@ static void hvm_zap_mmio_range( if ( pfn + i >= 0xfffff ) break; - __copy_to_user(&phys_to_machine_mapping[pfn + i], &val, sizeof (val)); + if ( VALID_MFN(gmfn_to_mfn(d, pfn + i)) ) + guest_remove_page(d, pfn + i); } } @@ -262,11 +260,13 @@ void hvm_setup_platform(struct domain* d if ( !hvm_guest(v) || (v->vcpu_id != 0) ) return; +#if 0 /* SHADOW2 does not have this */ if ( shadow_direct_map_init(d) == 0 ) { printk("Can not allocate shadow direct map for HVM domain.\n"); domain_crash_synchronous(); } +#endif hvm_zap_iommu_pages(d); @@ -345,11 +345,44 @@ int cpu_get_interrupt(struct vcpu *v, in return -1; } +#include <asm/hvm/vmx/vmx.h> +void hvm_hlt(unsigned long rflags) +{ + struct vcpu *v = current; + struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm; + s_time_t next_pit = -1, next_wakeup; + + /* + * Detect machine shutdown. Only do this for vcpu 0, to avoid potentially + * shutting down the domain early. If we halt with interrupts disabled, + * that's a pretty sure sign that we want to shut down. In a real + * processor, NMIs are the only way to break out of this. + */ + if ( (v->vcpu_id == 0) && !(rflags & X86_EFLAGS_IF) ) + { + printk("D%d: HLT with interrupts disabled -- shutting down.\n", + current->domain->domain_id); + domain_shutdown(current->domain, SHUTDOWN_poweroff); + return; + } + + if ( !v->vcpu_id ) + next_pit = get_scheduled(v, pt->irq, pt); + next_wakeup = get_apictime_scheduled(v); + if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) + next_wakeup = next_pit; + if ( next_wakeup != - 1 ) + set_timer(¤t->arch.hvm_vcpu.hlt_timer, next_wakeup); + do_sched_op_compat(SCHEDOP_block, 0); +} + /* * Copy from/to guest virtual. */ int hvm_copy(void *buf, unsigned long vaddr, int size, int dir) { + struct vcpu *v = current; + unsigned long gfn; unsigned long mfn; char *addr; int count; @@ -359,10 +392,9 @@ int hvm_copy(void *buf, unsigned long va if (count > size) count = size; - if (hvm_paging_enabled(current)) - mfn = gva_to_mfn(vaddr); - else - mfn = get_mfn_from_gpfn(vaddr >> PAGE_SHIFT); + gfn = shadow2_gva_to_gfn(v, vaddr); + mfn = mfn_x(sh2_vcpu_gfn_to_mfn(v, gfn)); + if (mfn == INVALID_MFN) return 0; @@ -393,12 +425,12 @@ void hvm_print_line(struct vcpu *v, cons if (*index == HVM_PBUF_SIZE-2 || c == '\n') { if (*index == HVM_PBUF_SIZE-2) - pbuf[(*index)++] = c; + pbuf[(*index)++] = c; pbuf[*index] = '\0'; printk("(GUEST: %u) %s\n", v->domain->domain_id, pbuf); - *index = 0; + *index = 0; } else - pbuf[(*index)++] = c; + pbuf[(*index)++] = c; } typedef unsigned long hvm_hypercall_t( @@ -515,7 +547,7 @@ void hvm_do_hypercall(struct cpu_user_re return; } - if ( current->domain->arch.ops->guest_paging_levels == PAGING_L4 ) + if ( current->arch.shadow2->guest_levels == 4 ) { pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi, pregs->rsi, diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/i8259.c --- a/xen/arch/x86/hvm/i8259.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/i8259.c Sun Aug 20 11:08:45 2006 -0400 @@ -57,7 +57,7 @@ static inline void pic_set_irq1(PicState if (level) { if ((s->last_irr & mask) == 0) { s->irr |= mask; - } + } s->last_irr |= mask; } else { s->last_irr &= ~mask; @@ -237,7 +237,7 @@ static void update_shared_irr(struct hvm BUG_ON(!spin_is_locked(&s->lock)); get_sp(current->domain)->sp_global.pic_elcr = - s->pics[0].elcr | ((u16)s->pics[1].elcr << 8); + s->pics[0].elcr | ((u16)s->pics[1].elcr << 8); pl =(uint8_t*)&get_sp(current->domain)->sp_global.pic_last_irr; pe =(uint8_t*)&get_sp(current->domain)->sp_global.pic_elcr; if ( c == &s->pics[0] ) { @@ -550,7 +550,7 @@ static int intercept_elcr_io(ioreq_t *p) spin_lock_irqsave(&s->lock, flags); elcr_ioport_write((void*)&s->pics[p->addr&1], (uint32_t) p->addr, (uint32_t)( data & 0xff)); - get_sp(current->domain)->sp_global.pic_elcr = + get_sp(current->domain)->sp_global.pic_elcr = s->pics[0].elcr | ((u16)s->pics[1].elcr << 8); spin_unlock_irqrestore(&s->lock, flags); } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/intercept.c Sun Aug 20 11:08:45 2006 -0400 @@ -284,7 +284,7 @@ static __inline__ void missed_ticks(stru missed_ticks = NOW() - pt->scheduled; if ( missed_ticks > 0 ) { - missed_ticks = missed_ticks / (s_time_t) pt->period + 1; + missed_ticks = missed_ticks / (s_time_t) pt->period + 1; if ( missed_ticks > 1000 ) { /* TODO: Adjust guest time togther */ pt->pending_intr_nr++; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/platform.c Sun Aug 20 11:08:45 2006 -0400 @@ -21,7 +21,7 @@ #include <xen/config.h> #include <xen/types.h> #include <xen/mm.h> -#include <asm/shadow.h> +#include <xen/shadow.h> #include <xen/domain_page.h> #include <asm/page.h> #include <xen/event.h> @@ -35,9 +35,6 @@ #include <xen/lib.h> #include <xen/sched.h> #include <asm/current.h> -#if CONFIG_PAGING_LEVELS >= 3 -#include <asm/shadow_64.h> -#endif #define DECODE_success 1 #define DECODE_failure 0 @@ -724,7 +721,7 @@ void send_pio_req(struct cpu_user_regs * if (pvalid) { if (hvm_paging_enabled(current)) - p->u.pdata = (void *) gva_to_gpa(value); + p->u.data = shadow2_gva_to_gpa(current, value); else p->u.pdata = (void *) value; /* guest VA == guest PA */ } else @@ -774,7 +771,7 @@ void send_mmio_req( if (pvalid) { if (hvm_paging_enabled(v)) - p->u.pdata = (void *) gva_to_gpa(value); + p->u.data = shadow2_gva_to_gpa(v, value); else p->u.pdata = (void *) value; /* guest VA == guest PA */ } else diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/instrlen.c --- a/xen/arch/x86/hvm/svm/instrlen.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/svm/instrlen.c Sun Aug 20 11:08:45 2006 -0400 @@ -464,7 +464,7 @@ done_prefixes: case 4: insn_fetch(int32_t, 4, _regs.eip, length); break; } goto done; - } + } break; } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/svm/svm.c Sun Aug 20 11:08:45 2006 -0400 @@ -26,9 +26,10 @@ #include <xen/irq.h> #include <xen/softirq.h> #include <xen/hypercall.h> +#include <xen/domain_page.h> #include <asm/current.h> #include <asm/io.h> -#include <asm/shadow.h> +#include <asm/shadow2.h> #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> @@ -43,10 +44,6 @@ #include <asm/hvm/svm/emulate.h> #include <asm/hvm/svm/vmmcall.h> #include <asm/hvm/svm/intr.h> -#include <asm/shadow.h> -#if CONFIG_PAGING_LEVELS >= 3 -#include <asm/shadow_64.h> -#endif #include <public/sched.h> #define SVM_EXTRA_DEBUG @@ -61,7 +58,7 @@ extern int inst_copy_from_guest(unsigned int inst_len); extern asmlinkage void do_IRQ(struct cpu_user_regs *); extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port, - unsigned long count, int size, long value, int dir, int pvalid); + unsigned long count, int size, long value, int dir, int pvalid); extern int svm_instrlen(struct cpu_user_regs *regs, int mode); extern void svm_dump_inst(unsigned long eip); extern int svm_dbg_on; @@ -69,7 +66,7 @@ void svm_dump_regs(const char *from, str static void svm_relinquish_guest_resources(struct domain *d); static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v, - struct cpu_user_regs *regs); + struct cpu_user_regs *regs); /* va of hardware host save area */ static void *hsa[NR_CPUS] __read_mostly; @@ -110,7 +107,7 @@ void asidpool_init(int core) /* Host ASID is always in use */ per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE; for ( i = 1; i < ASID_MAX; i++ ) - per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE; + per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE; } @@ -142,7 +139,7 @@ static int asidpool_fetch_next(struct vm * available. */ int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current, - int oldcore, int newcore ) + int oldcore, int newcore ) { int i; int res = 1; @@ -150,8 +147,8 @@ int asidpool_assign_next( struct vmcb_st spin_lock(&per_cpu(asid_pool,oldcore).asid_lock); if( retire_current && vmcb->guest_asid ) { - per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] = - ASID_RETIRED; + per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] = + ASID_RETIRED; } spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock); spin_lock(&per_cpu(asid_pool,newcore).asid_lock); @@ -174,12 +171,12 @@ int asidpool_assign_next( struct vmcb_st void asidpool_retire( struct vmcb_struct *vmcb, int core ) { - spin_lock(&per_cpu(asid_pool,core).asid_lock); - if( vmcb->guest_asid ) { - per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] = - ASID_RETIRED; - } - spin_unlock(&per_cpu(asid_pool,core).asid_lock); + spin_lock(&per_cpu(asid_pool,core).asid_lock); + if( vmcb->guest_asid ) { + per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] = + ASID_RETIRED; + } + spin_unlock(&per_cpu(asid_pool,core).asid_lock); } static inline void svm_inject_exception(struct vcpu *v, int trap, @@ -289,26 +286,26 @@ static inline int long_mode_do_msr_read( break; case MSR_STAR: - msr_content = vmcb->star; - break; + msr_content = vmcb->star; + break; case MSR_LSTAR: - msr_content = vmcb->lstar; - break; + msr_content = vmcb->lstar; + break; case MSR_CSTAR: - msr_content = vmcb->cstar; - break; + msr_content = vmcb->cstar; + break; case MSR_SYSCALL_MASK: - msr_content = vmcb->sfmask; - break; + msr_content = vmcb->sfmask; + break; default: return 0; } HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n", - msr_content); + msr_content); regs->eax = msr_content & 0xffffffff; regs->edx = msr_content >> 32; @@ -381,24 +378,24 @@ static inline int long_mode_do_msr_write break; case MSR_SHADOW_GS_BASE: - vmcb->kerngsbase = msr_content; - break; + vmcb->kerngsbase = msr_content; + break; case MSR_STAR: - vmcb->star = msr_content; - break; + vmcb->star = msr_content; + break; case MSR_LSTAR: - vmcb->lstar = msr_content; - break; + vmcb->lstar = msr_content; + break; case MSR_CSTAR: - vmcb->cstar = msr_content; - break; + vmcb->cstar = msr_content; + break; case MSR_SYSCALL_MASK: - vmcb->sfmask = msr_content; - break; + vmcb->sfmask = msr_content; + break; default: return 0; @@ -414,7 +411,7 @@ static int svm_realmode(struct vcpu *v) return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE); } -static int svm_instruction_length(struct vcpu *v) +int svm_guest_x86_mode(struct vcpu *v) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode; @@ -423,10 +420,20 @@ static int svm_instruction_length(struct mode = vmcb->cs.attributes.fields.l ? 8 : 4; else mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4; - return svm_instrlen(guest_cpu_user_regs(), mode); -} - -static unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num) + return mode; +} + +int svm_instruction_length(struct vcpu *v) +{ + return svm_instrlen(guest_cpu_user_regs(), svm_guest_x86_mode(v)); +} + +void svm_update_host_cr3(struct vcpu *v) +{ + /* SVM doesn't have a HOST_CR3 equivalent to update. */ +} + +unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num) { switch ( num ) { @@ -436,6 +443,8 @@ static unsigned long svm_get_ctrl_reg(st return v->arch.hvm_svm.cpu_cr2; case 3: return v->arch.hvm_svm.cpu_cr3; + case 4: + return v->arch.hvm_svm.cpu_shadow_cr4; default: BUG(); } @@ -524,8 +533,6 @@ static void svm_init_hypercall_page(stru /* Don't support HYPERVISOR_iret at the moment */ *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */ } - - int svm_dbg_on = 0; @@ -574,9 +581,9 @@ static inline int svm_do_debugout(unsign #if 0 if ((exit_code == 0x4E - || exit_code == VMEXIT_CR0_READ - || exit_code == VMEXIT_CR0_WRITE) - && counter < 200000) + || exit_code == VMEXIT_CR0_READ + || exit_code == VMEXIT_CR0_WRITE) + && counter < 200000) return 0; if ((exit_code == 0x4E) && counter < 500000) @@ -647,6 +654,11 @@ static void svm_load_cpu_guest_regs( svm_load_cpu_user_regs(v, regs); } +int svm_long_mode_enabled(struct vcpu *v) +{ + return SVM_LONG_GUEST(v); +} + static void arch_svm_do_launch(struct vcpu *v) @@ -676,18 +688,18 @@ static void arch_svm_do_launch(struct vc #endif if (v->vcpu_id != 0) { - u16 cs_sel = regs->cs; - /* + u16 cs_sel = regs->cs; + /* * This is the launch of an AP; set state so that we begin executing - * the trampoline code in real-mode. + * the trampoline code in real-mode. */ - svm_do_vmmcall_reset_to_realmode(v, regs); - /* Adjust the state to execute the trampoline code.*/ - v->arch.hvm_svm.vmcb->rip = 0; - v->arch.hvm_svm.vmcb->cs.sel= cs_sel; - v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4); - } - + svm_do_vmmcall_reset_to_realmode(v, regs); + /* Adjust the state to execute the trampoline code.*/ + v->arch.hvm_svm.vmcb->rip = 0; + v->arch.hvm_svm.vmcb->cs.sel= cs_sel; + v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4); + } + reset_stack_and_jump(svm_asm_do_launch); } @@ -726,7 +738,6 @@ static void svm_final_setup_guest(struct static void svm_final_setup_guest(struct vcpu *v) { struct domain *d = v->domain; - struct vcpu *vc; v->arch.schedule_tail = arch_svm_do_launch; v->arch.ctxt_switch_from = svm_ctxt_switch_from; @@ -735,9 +746,12 @@ static void svm_final_setup_guest(struct if ( v != d->vcpu[0] ) return; - /* Initialize monitor page table */ - for_each_vcpu( d, vc ) - vc->arch.monitor_table = pagetable_null(); + if ( !shadow2_mode_external(d) ) + { + DPRINTK("Can't init HVM for dom %u vcpu %u: " + "not in shadow2 external mode\n", d->domain_id, v->vcpu_id); + domain_crash(d); + } /* * Required to do this once per domain @@ -745,13 +759,6 @@ static void svm_final_setup_guest(struct */ memset(&d->shared_info->evtchn_mask[0], 0xff, sizeof(d->shared_info->evtchn_mask)); - - /* - * Put the domain in shadow mode even though we're going to be using - * the shared 1:1 page table initially. It shouldn't hurt - */ - shadow_mode_enable(d, SHM_enable|SHM_refcounts| - SHM_translate|SHM_external|SHM_wr_pt_pte); } @@ -769,7 +776,7 @@ int start_svm(void) u64 phys_hsa; int cpu = smp_processor_id(); - /* Xen does not fill x86_capability words except 0. */ + /* Xen does not fill x86_capability words except 0. */ ecx = cpuid_ecx(0x80000001); boot_cpu_data.x86_capability[5] = ecx; @@ -809,9 +816,13 @@ int start_svm(void) hvm_funcs.realmode = svm_realmode; hvm_funcs.paging_enabled = svm_paging_enabled; + hvm_funcs.long_mode_enabled = svm_long_mode_enabled; + hvm_funcs.guest_x86_mode = svm_guest_x86_mode; hvm_funcs.instruction_length = svm_instruction_length; hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg; + hvm_funcs.update_host_cr3 = svm_update_host_cr3; + hvm_funcs.stts = svm_stts; hvm_funcs.set_tsc_offset = svm_set_tsc_offset; @@ -834,8 +845,7 @@ static void svm_relinquish_guest_resourc continue; destroy_vmcb(&v->arch.hvm_svm); - free_monitor_pagetable(v); - kill_timer(&v->arch.hvm_svm.hlt_timer); + kill_timer(&v->arch.hvm_vcpu.hlt_timer); if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) ) { kill_timer( &(VLAPIC(v)->vlapic_timer) ); @@ -851,8 +861,6 @@ static void svm_relinquish_guest_resourc if ( d->arch.hvm_domain.buffered_io_va ) unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va); - - shadow_direct_map_clean(d); } @@ -863,7 +871,7 @@ static void svm_migrate_timers(struct vc if ( pt->enabled ) { migrate_timer( &pt->timer, v->processor ); - migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor ); + migrate_timer( &v->arch.hvm_vcpu.hlt_timer, v->processor ); } if ( hvm_apic_support(v->domain) && VLAPIC( v )) migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor ); @@ -880,7 +888,7 @@ void arch_svm_do_resume(struct vcpu *v) else { if (svm_dbg_on) printk("VCPU core pinned: %d to %d\n", - v->arch.hvm_svm.launch_core, smp_processor_id() ); + v->arch.hvm_svm.launch_core, smp_processor_id() ); v->arch.hvm_svm.launch_core = smp_processor_id(); svm_migrate_timers( v ); hvm_do_resume( v ); @@ -894,7 +902,6 @@ static int svm_do_page_fault(unsigned lo { struct vcpu *v = current; unsigned long eip; - unsigned long gpa; /* FIXME: PAE */ int result; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; @@ -903,47 +910,11 @@ static int svm_do_page_fault(unsigned lo //#if HVM_DEBUG eip = vmcb->rip; HVM_DBG_LOG(DBG_LEVEL_VMMU, - "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", - va, eip, (unsigned long)regs->error_code); + "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", + va, eip, (unsigned long)regs->error_code); //#endif - if ( !svm_paging_enabled(v) ) - { - if ( shadow_direct_map_fault(va, regs) ) - return 1; - - handle_mmio(va, va); - return 1; - } - - - gpa = gva_to_gpa(va); - - /* Use 1:1 page table to identify MMIO address space */ - if (mmio_space(gpa)) - { - /* No support for APIC */ - if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) - { - int inst_len; - inst_len = svm_instruction_length(v); - if (inst_len == -1) - { - printf("%s: INST_LEN - Unable to decode properly\n", __func__); - domain_crash_synchronous(); - } - - __update_guest_eip(vmcb, inst_len); - - return 1; - } - - handle_mmio(va, gpa); - - return 1; - } - - result = shadow_fault(va, regs); + result = shadow2_fault(va, regs); if( result ) { /* Let's make sure that the Guest TLB is flushed */ @@ -967,7 +938,7 @@ static void svm_do_no_device_fault(struc static void svm_do_general_protection_fault(struct vcpu *v, - struct cpu_user_regs *regs) + struct cpu_user_regs *regs) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; unsigned long eip, error_code; @@ -981,7 +952,7 @@ static void svm_do_general_protection_fa printf("Huh? We got a GP Fault with an invalid IDTR!\n"); svm_dump_vmcb(__func__, vmcb); svm_dump_regs(__func__, regs); - svm_dump_inst(vmcb->rip); + svm_dump_inst(vmcb->rip); __hvm_bug(regs); } @@ -990,10 +961,10 @@ static void svm_do_general_protection_fa eip, error_code); HVM_DBG_LOG(DBG_LEVEL_1, - "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", - (unsigned long)regs->eax, (unsigned long)regs->ebx, - (unsigned long)regs->ecx, (unsigned long)regs->edx, - (unsigned long)regs->esi, (unsigned long)regs->edi); + "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", + (unsigned long)regs->eax, (unsigned long)regs->ebx, + (unsigned long)regs->ecx, (unsigned long)regs->edx, + (unsigned long)regs->esi, (unsigned long)regs->edi); /* Reflect it back into the guest */ svm_inject_exception(v, TRAP_gp_fault, 1, error_code); @@ -1005,7 +976,7 @@ static void svm_do_general_protection_fa #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input, - struct cpu_user_regs *regs) + struct cpu_user_regs *regs) { unsigned int eax, ebx, ecx, edx; unsigned long eip; @@ -1017,37 +988,30 @@ static void svm_vmexit_do_cpuid(struct v eip = vmcb->rip; HVM_DBG_LOG(DBG_LEVEL_1, - "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx," - " (esi) %lx, (edi) %lx", - (unsigned long)regs->eax, (unsigned long)regs->ebx, - (unsigned long)regs->ecx, (unsigned long)regs->edx, - (unsigned long)regs->esi, (unsigned long)regs->edi); + "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx," + " (esi) %lx, (edi) %lx", + (unsigned long)regs->eax, (unsigned long)regs->ebx, + (unsigned long)regs->ecx, (unsigned long)regs->edx, + (unsigned long)regs->esi, (unsigned long)regs->edi); cpuid(input, &eax, &ebx, &ecx, &edx); if (input == 0x00000001) { if ( !hvm_apic_support(v->domain) || - !vlapic_global_enabled((VLAPIC(v))) ) + !vlapic_global_enabled((VLAPIC(v))) ) { /* Since the apic is disabled, avoid any confusion about SMP cpus being available */ clear_bit(X86_FEATURE_APIC, &edx); } -#if CONFIG_PAGING_LEVELS < 3 - clear_bit(X86_FEATURE_PAE, &edx); - clear_bit(X86_FEATURE_PSE, &edx); +#if CONFIG_PAGING_LEVELS >= 3 + if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) +#endif + clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); -#else - if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) - { - if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) - clear_bit(X86_FEATURE_PAE, &edx); - clear_bit(X86_FEATURE_PSE, &edx); - clear_bit(X86_FEATURE_PSE36, &edx); - } -#endif + /* Clear out reserved bits. */ ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED; edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED; @@ -1097,23 +1061,12 @@ static void svm_vmexit_do_cpuid(struct v clear_bit(X86_FEATURE_SYSCALL & 31, &edx); #endif -#if CONFIG_PAGING_LEVELS < 3 - clear_bit(X86_FEATURE_NX & 31, &edx); - clear_bit(X86_FEATURE_PAE, &edx); - clear_bit(X86_FEATURE_PSE, &edx); + +#if CONFIG_PAGING_LEVELS >= 3 + if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) +#endif + clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); -#else - if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) - { - if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) - { - clear_bit(X86_FEATURE_NX & 31, &edx); - clear_bit(X86_FEATURE_PAE, &edx); - } - clear_bit(X86_FEATURE_PSE, &edx); - clear_bit(X86_FEATURE_PSE36, &edx); - } -#endif /* Make SVM feature invisible to the guest. */ clear_bit(X86_FEATURE_SVME & 31, &ecx); @@ -1138,9 +1091,9 @@ static void svm_vmexit_do_cpuid(struct v regs->edx = (unsigned long)edx; HVM_DBG_LOG(DBG_LEVEL_1, - "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, " - "ebx=%x, ecx=%x, edx=%x", - eip, input, eax, ebx, ecx, edx); + "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, " + "ebx=%x, ecx=%x, edx=%x", + eip, input, eax, ebx, ecx, edx); inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL); ASSERT(inst_len > 0); @@ -1149,7 +1102,7 @@ static void svm_vmexit_do_cpuid(struct v static inline unsigned long *get_reg_p(unsigned int gpreg, - struct cpu_user_regs *regs, struct vmcb_struct *vmcb) + struct cpu_user_regs *regs, struct vmcb_struct *vmcb) { unsigned long *reg_p = NULL; switch (gpreg) @@ -1213,7 +1166,7 @@ static inline unsigned long *get_reg_p(u static inline unsigned long get_reg(unsigned int gpreg, - struct cpu_user_regs *regs, struct vmcb_struct *vmcb) + struct cpu_user_regs *regs, struct vmcb_struct *vmcb) { unsigned long *gp; gp = get_reg_p(gpreg, regs, vmcb); @@ -1222,7 +1175,7 @@ static inline unsigned long get_reg(unsi static inline void set_reg(unsigned int gpreg, unsigned long value, - struct cpu_user_regs *regs, struct vmcb_struct *vmcb) + struct cpu_user_regs *regs, struct vmcb_struct *vmcb) { unsigned long *gp; gp = get_reg_p(gpreg, regs, vmcb); @@ -1231,7 +1184,7 @@ static inline void set_reg(unsigned int static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type, - struct cpu_user_regs *regs) + struct cpu_user_regs *regs) { unsigned long *reg_p = 0; unsigned int gpreg = 0; @@ -1259,7 +1212,7 @@ static void svm_dr_access (struct vcpu * ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2])); HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x", - eip, reg, gpreg); + eip, reg, gpreg); reg_p = get_reg_p(gpreg, regs, vmcb); @@ -1267,7 +1220,7 @@ static void svm_dr_access (struct vcpu * { case TYPE_MOV_TO_DR: inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer); - v->arch.guest_context.debugreg[reg] = *reg_p; + v->arch.guest_context.debugreg[reg] = *reg_p; break; case TYPE_MOV_FROM_DR: inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer); @@ -1291,7 +1244,7 @@ static void svm_get_prefix_info( memset(inst, 0, MAX_INST_LEN); if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst)) - != MAX_INST_LEN) + != MAX_INST_LEN) { printk("%s: get guest instruction failed\n", __func__); domain_crash_synchronous(); @@ -1555,6 +1508,7 @@ static int svm_set_cr0(unsigned long val unsigned long mfn; int paging_enabled; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + unsigned long old_base_mfn; ASSERT(vmcb); @@ -1577,8 +1531,8 @@ static int svm_set_cr0(unsigned long val { /* The guest CR3 must be pointing to the guest physical. */ if (!VALID_MFN(mfn = - get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) - || !get_page(mfn_to_page(mfn), v->domain)) + get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) + || !get_page(mfn_to_page(mfn), v->domain)) { printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3); domain_crash_synchronous(); /* need to take a clean path */ @@ -1586,8 +1540,8 @@ static int svm_set_cr0(unsigned long val #if defined(__x86_64__) if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state) - && !test_bit(SVM_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_svm.cpu_state)) + && !test_bit(SVM_CPU_STATE_PAE_ENABLED, + &v->arch.hvm_svm.cpu_state)) { HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n"); svm_inject_exception(v, TRAP_gp_fault, 1, 0); @@ -1600,60 +1554,27 @@ static int svm_set_cr0(unsigned long val set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); vmcb->efer |= (EFER_LMA | EFER_LME); - if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ - } - } - else + } #endif /* __x86_64__ */ - { -#if CONFIG_PAGING_LEVELS >= 3 - /* seems it's a 32-bit or 32-bit PAE guest */ - if ( test_bit(SVM_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_svm.cpu_state) ) - { - /* The guest enables PAE first and then it enables PG, it is - * really a PAE guest */ - if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); - } - } - else - { - if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ - } - } -#endif - } /* Now arch.guest_table points to machine physical. */ + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); - update_pagetables(v); + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + shadow2_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", - (unsigned long) (mfn << PAGE_SHIFT)); - + (unsigned long) (mfn << PAGE_SHIFT)); + + vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); - vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); - - /* arch->shadow_table should hold the next CR3 for shadow */ - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n", - v->arch.hvm_svm.cpu_cr3, mfn); - - return 1; } if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled ) if ( v->arch.hvm_svm.cpu_cr3 ) { put_page(mfn_to_page(get_mfn_from_gpfn( - v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))); + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))); v->arch.guest_table = pagetable_null(); } @@ -1667,17 +1588,16 @@ static int svm_set_cr0(unsigned long val svm_inject_exception(v, TRAP_gp_fault, 1, 0); return 0; } - - clear_all_shadow_status( v->domain ); + shadow2_update_paging_modes(v); + vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); - vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); } else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) { /* we should take care of this kind of situation */ - clear_all_shadow_status(v->domain); + shadow2_update_paging_modes(v); + vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); - vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); } return 1; @@ -1701,7 +1621,7 @@ static void mov_from_cr(int cr, int gp, value = v->arch.hvm_svm.cpu_shadow_cr0; if (svm_dbg_on) printk("CR0 read =%lx \n", value ); - break; + break; case 2: value = vmcb->cr2; break; @@ -1709,11 +1629,11 @@ static void mov_from_cr(int cr, int gp, value = (unsigned long) v->arch.hvm_svm.cpu_cr3; if (svm_dbg_on) printk("CR3 read =%lx \n", value ); - break; + break; case 4: value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4; if (svm_dbg_on) - printk( "CR4 read=%lx\n", value ); + printk( "CR4 read=%lx\n", value ); break; case 8: #if 0 @@ -1735,7 +1655,7 @@ static void mov_from_cr(int cr, int gp, static inline int svm_pgbit_test(struct vcpu *v) { - return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; + return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; } @@ -1786,7 +1706,7 @@ static int mov_to_cr(int gpreg, int cr, mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(v->arch.guest_table)) __hvm_bug(regs); - shadow_sync_all(v->domain); + shadow2_update_cr3(v); } else { @@ -1796,8 +1716,8 @@ static int mov_to_cr(int gpreg, int cr, */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); if (((value >> PAGE_SHIFT) > v->domain->max_pages) - || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT)) - || !get_page(mfn_to_page(mfn), v->domain)) + || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT)) + || !get_page(mfn_to_page(mfn), v->domain)) { printk("Invalid CR3 value=%lx\n", value); domain_crash_synchronous(); /* need to take a clean path */ @@ -1812,14 +1732,10 @@ static int mov_to_cr(int gpreg, int cr, /* * arch.shadow_table should now hold the next CR3 for shadow */ -#if CONFIG_PAGING_LEVELS >= 3 - if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 ) - shadow_sync_all(v->domain); -#endif v->arch.hvm_svm.cpu_cr3 = value; - update_pagetables(v); + update_cr3(v); + vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); - vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); } break; } @@ -1828,7 +1744,7 @@ static int mov_to_cr(int gpreg, int cr, { if (svm_dbg_on) printk( "write cr4=%lx, cr0=%lx\n", - value, v->arch.hvm_svm.cpu_shadow_cr0 ); + value, v->arch.hvm_svm.cpu_shadow_cr0 ); old_cr = v->arch.hvm_svm.cpu_shadow_cr4; if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) ) { @@ -1839,35 +1755,28 @@ static int mov_to_cr(int gpreg, int cr, #if CONFIG_PAGING_LEVELS >= 3 unsigned long mfn, old_base_mfn; - if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ - } - if ( !VALID_MFN(mfn = get_mfn_from_gpfn( - v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) || + v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) || !get_page(mfn_to_page(mfn), v->domain) ) { printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3); domain_crash_synchronous(); /* need to take a clean path */ } - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - /* * Now arch.guest_table points to machine physical. */ + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); - update_pagetables(v); + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + shadow2_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table); + vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; /* * arch->shadow_table should hold the next CR3 for shadow @@ -1876,33 +1785,6 @@ static int mov_to_cr(int gpreg, int cr, HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_svm.cpu_cr3, mfn); -#endif - } - else - { - /* The guest is a 64 bit or 32-bit PAE guest. */ -#if CONFIG_PAGING_LEVELS >= 3 - if ( (v->domain->arch.ops != NULL) && - v->domain->arch.ops->guest_paging_levels == PAGING_L2) - { - /* Seems the guest first enables PAE without enabling PG, - * it must enable PG after that, and it is a 32-bit PAE - * guest */ - - if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3)) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); - } - } - else - { - if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L4)) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); - } - } #endif } } @@ -1926,7 +1808,7 @@ static int mov_to_cr(int gpreg, int cr, if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) { set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); - shadow_sync_all(v->domain); + shadow2_update_paging_modes(v); } break; } @@ -1944,7 +1826,7 @@ static int mov_to_cr(int gpreg, int cr, static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type, - struct cpu_user_regs *regs) + struct cpu_user_regs *regs) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; int inst_len = 0; @@ -1968,13 +1850,13 @@ static int svm_cr_access(struct vcpu *v, if (type == TYPE_MOV_TO_CR) { - inst_len = __get_instruction_length_from_list(vmcb, list_a, - ARR_SIZE(list_a), &buffer[index], &match); + inst_len = __get_instruction_length_from_list( + vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match); } else { - inst_len = __get_instruction_length_from_list(vmcb, list_b, - ARR_SIZE(list_b), &buffer[index], &match); + inst_len = __get_instruction_length_from_list( + vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match); } ASSERT(inst_len > 0); @@ -2016,7 +1898,7 @@ static int svm_cr_access(struct vcpu *v, if (svm_dbg_on) printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg, - inst_len); + inst_len); value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value; @@ -2035,7 +1917,7 @@ static int svm_cr_access(struct vcpu *v, if (svm_dbg_on) printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg, - inst_len); + inst_len); break; default: @@ -2061,9 +1943,9 @@ static inline void svm_do_msr_access( ASSERT(vmcb); HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, " - "exitinfo = %lx", (unsigned long)regs->ecx, - (unsigned long)regs->eax, (unsigned long)regs->edx, - (unsigned long)vmcb->exitinfo1); + "exitinfo = %lx", (unsigned long)regs->ecx, + (unsigned long)regs->eax, (unsigned long)regs->edx, + (unsigned long)vmcb->exitinfo1); /* is it a read? */ if (vmcb->exitinfo1 == 0) @@ -2133,7 +2015,7 @@ static inline void svm_do_msr_access( } } -done: + done: HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: " "ecx=%lx, eax=%lx, edx=%lx", @@ -2144,29 +2026,16 @@ done: } -/* - * Need to use this exit to reschedule - */ static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb) { - struct vcpu *v = current; - struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm; - s_time_t next_pit = -1, next_wakeup; - __update_guest_eip(vmcb, 1); - /* check for interrupt not handled or new interrupt */ - if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) ) - return; - - if ( !v->vcpu_id ) - next_pit = get_scheduled(v, pt->irq, pt); - next_wakeup = get_apictime_scheduled(v); - if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) - next_wakeup = next_pit; - if ( next_wakeup != - 1 ) - set_timer(¤t->arch.hvm_svm.hlt_timer, next_wakeup); - do_sched_op_compat(SCHEDOP_block, 0); + /* Check for interrupt not handled or new interrupt. */ + if ( (vmcb->rflags & X86_EFLAGS_IF) && + (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) + return; + + hvm_hlt(vmcb->rflags); } @@ -2193,7 +2062,7 @@ static void svm_vmexit_do_invd(struct vm #ifdef XEN_DEBUGGER static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb, - struct cpu_user_regs *regs) + struct cpu_user_regs *regs) { regs->eip = vmcb->rip; regs->esp = vmcb->rsp; @@ -2241,7 +2110,7 @@ void svm_handle_invlpg(const short invlp { printk("svm_handle_invlpg (): Error reading memory %d bytes\n", length); - __hvm_bug(regs); + __hvm_bug(regs); } if (invlpga) @@ -2272,7 +2141,7 @@ void svm_handle_invlpg(const short invlp * the system in either 32- or 64-bit mode. */ g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, - &opcode[inst_len], &length); + &opcode[inst_len], &length); inst_len += length; __update_guest_eip (vmcb, inst_len); @@ -2280,7 +2149,7 @@ void svm_handle_invlpg(const short invlp /* Overkill, we may not this */ set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags); - shadow_invlpg(v, g_vaddr); + shadow2_invlpg(v, g_vaddr); } @@ -2291,7 +2160,7 @@ void svm_handle_invlpg(const short invlp * returns 0 on success, non-zero otherwise */ static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v, - struct cpu_user_regs *regs) + struct cpu_user_regs *regs) { struct vmcb_struct *vmcb; @@ -2651,10 +2520,10 @@ void walk_shadow_and_guest_pt(unsigned l struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; unsigned long gpa; - gpa = gva_to_gpa( gva ); + gpa = shadow2_gva_to_gpa(current, gva); printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 ); if( !svm_paging_enabled(v) || mmio_space(gpa) ) - return; + return; /* let's dump the guest and shadow page info */ @@ -2675,8 +2544,12 @@ void walk_shadow_and_guest_pt(unsigned l __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], sizeof(gpte) ); printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) ); - __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], + + BUG(); // need to think about this, and convert usage of + // phys_to_machine_mapping to use pagetable format... + __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], sizeof(spte) ); + printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte)); } #endif /* SVM_WALK_GUEST_PAGES */ @@ -2708,99 +2581,105 @@ asmlinkage void svm_vmexit_handler(struc } #ifdef SVM_EXTRA_DEBUG -{ + { #if defined(__i386__) -#define rip eip +#define rip eip #endif - static unsigned long intercepts_counter = 0; - - if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) - { - if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) - { - printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, " - "gpa=%llx\n", intercepts_counter, - exit_reasons[exit_reason], exit_reason, regs.cs, - (unsigned long long) regs.rip, - (unsigned long long) vmcb->exitinfo1, - (unsigned long long) vmcb->exitinfo2, - (unsigned long long) vmcb->exitintinfo.bytes, - (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) ); - } - else - { - printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", - intercepts_counter, - exit_reasons[exit_reason], exit_reason, regs.cs, - (unsigned long long) regs.rip, - (unsigned long long) vmcb->exitinfo1, - (unsigned long long) vmcb->exitinfo2, - (unsigned long long) vmcb->exitintinfo.bytes ); - } - } - else if ( svm_dbg_on - && exit_reason != VMEXIT_IOIO - && exit_reason != VMEXIT_INTR) - { - - if (exit_reasons[exit_reason]) - { - printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", - intercepts_counter, - exit_reasons[exit_reason], exit_reason, regs.cs, - (unsigned long long) regs.rip, - (unsigned long long) vmcb->exitinfo1, - (unsigned long long) vmcb->exitinfo2, - (unsigned long long) vmcb->exitintinfo.bytes); + static unsigned long intercepts_counter = 0; + + if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) + { + if (svm_paging_enabled(v) && + !mmio_space(shadow2_gva_to_gpa(current, vmcb->exitinfo2))) + { + printk("I%08ld,ExC=%s(%d),IP=%x:%llx," + "I1=%llx,I2=%llx,INT=%llx, " + "gpa=%llx\n", intercepts_counter, + exit_reasons[exit_reason], exit_reason, regs.cs, + (unsigned long long) regs.rip, + (unsigned long long) vmcb->exitinfo1, + (unsigned long long) vmcb->exitinfo2, + (unsigned long long) vmcb->exitintinfo.bytes, + (unsigned long long) shadow2_gva_to_gpa(current, vmcb->exitinfo2)); + } + else + { + printk("I%08ld,ExC=%s(%d),IP=%x:%llx," + "I1=%llx,I2=%llx,INT=%llx\n", + intercepts_counter, + exit_reasons[exit_reason], exit_reason, regs.cs, + (unsigned long long) regs.rip, + (unsigned long long) vmcb->exitinfo1, + (unsigned long long) vmcb->exitinfo2, + (unsigned long long) vmcb->exitintinfo.bytes ); + } } - else - { - printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", - intercepts_counter, exit_reason, exit_reason, regs.cs, - (unsigned long long) regs.rip, - (unsigned long long) vmcb->exitinfo1, - (unsigned long long) vmcb->exitinfo2, - (unsigned long long) vmcb->exitintinfo.bytes); - } - } + else if ( svm_dbg_on + && exit_reason != VMEXIT_IOIO + && exit_reason != VMEXIT_INTR) + { + + if (exit_reasons[exit_reason]) + { + printk("I%08ld,ExC=%s(%d),IP=%x:%llx," + "I1=%llx,I2=%llx,INT=%llx\n", + intercepts_counter, + exit_reasons[exit_reason], exit_reason, regs.cs, + (unsigned long long) regs.rip, + (unsigned long long) vmcb->exitinfo1, + (unsigned long long) vmcb->exitinfo2, + (unsigned long long) vmcb->exitintinfo.bytes); + } + else + { + printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx," + "I1=%llx,I2=%llx,INT=%llx\n", + intercepts_counter, exit_reason, exit_reason, regs.cs, + (unsigned long long) regs.rip, + (unsigned long long) vmcb->exitinfo1, + (unsigned long long) vmcb->exitinfo2, + (unsigned long long) vmcb->exitintinfo.bytes); + } + } #ifdef SVM_WALK_GUEST_PAGES - if( exit_reason == VMEXIT_EXCEPTION_PF - && ( ( vmcb->exitinfo2 == vmcb->rip ) - || vmcb->exitintinfo.bytes) ) - { - if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2))) - walk_shadow_and_guest_pt( vmcb->exitinfo2 ); - } + if( exit_reason == VMEXIT_EXCEPTION_PF + && ( ( vmcb->exitinfo2 == vmcb->rip ) + || vmcb->exitintinfo.bytes) ) + { + if ( svm_paging_enabled(v) && + !mmio_space(gva_to_gpa(vmcb->exitinfo2)) ) + walk_shadow_and_guest_pt(vmcb->exitinfo2); + } #endif - intercepts_counter++; + intercepts_counter++; #if 0 - if (svm_dbg_on) - do_debug = svm_do_debugout(exit_reason); + if (svm_dbg_on) + do_debug = svm_do_debugout(exit_reason); #endif - if (do_debug) - { - printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, " - "shadow_table = 0x%08x\n", - __func__, - (int) v->arch.guest_table.pfn, - (int) v->arch.monitor_table.pfn, - (int) v->arch.shadow_table.pfn); - - svm_dump_vmcb(__func__, vmcb); - svm_dump_regs(__func__, ®s); - svm_dump_inst(svm_rip2pointer(vmcb)); - } + if (do_debug) + { + printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, " + "shadow_table = 0x%08x\n", + __func__, + (int) v->arch.guest_table.pfn, + (int) v->arch.monitor_table.pfn, + (int) v->arch.shadow_table.pfn); + + svm_dump_vmcb(__func__, vmcb); + svm_dump_regs(__func__, ®s); + svm_dump_inst(svm_rip2pointer(vmcb)); + } #if defined(__i386__) -#undef rip +#undef rip #endif -} + } #endif /* SVM_EXTRA_DEBUG */ @@ -2811,7 +2690,7 @@ asmlinkage void svm_vmexit_handler(struc if (do_debug) { printk("eip = %lx, exit_reason = %d (0x%x)\n", - eip, exit_reason, exit_reason); + eip, exit_reason, exit_reason); } #endif /* SVM_EXTRA_DEBUG */ @@ -2880,10 +2759,10 @@ asmlinkage void svm_vmexit_handler(struc va = vmcb->exitinfo2; regs.error_code = vmcb->exitinfo1; HVM_DBG_LOG(DBG_LEVEL_VMMU, - "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", - (unsigned long)regs.eax, (unsigned long)regs.ebx, - (unsigned long)regs.ecx, (unsigned long)regs.edx, - (unsigned long)regs.esi, (unsigned long)regs.edi); + "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", + (unsigned long)regs.eax, (unsigned long)regs.ebx, + (unsigned long)regs.ecx, (unsigned long)regs.edx, + (unsigned long)regs.esi, (unsigned long)regs.edi); if (!(error = svm_do_page_fault(va, ®s))) { @@ -2893,7 +2772,7 @@ asmlinkage void svm_vmexit_handler(struc v->arch.hvm_svm.cpu_cr2 = va; vmcb->cr2 = va; TRACE_3D(TRC_VMX_INT, v->domain->domain_id, - VMEXIT_EXCEPTION_PF, va); + VMEXIT_EXCEPTION_PF, va); } break; } @@ -3048,8 +2927,8 @@ asmlinkage void svm_vmexit_handler(struc default: printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, " "exitinfo2 = %llx\n", exit_reason, - (unsigned long long)vmcb->exitinfo1, - (unsigned long long)vmcb->exitinfo2); + (unsigned long long)vmcb->exitinfo1, + (unsigned long long)vmcb->exitinfo2); __hvm_bug(®s); /* should not happen */ break; } @@ -3064,10 +2943,10 @@ asmlinkage void svm_vmexit_handler(struc if (do_debug) { printk("vmexit_handler():- guest_table = 0x%08x, " - "monitor_table = 0x%08x, shadow_table = 0x%08x\n", - (int)v->arch.guest_table.pfn, - (int)v->arch.monitor_table.pfn, - (int)v->arch.shadow_table.pfn); + "monitor_table = 0x%08x, shadow_table = 0x%08x\n", + (int)v->arch.guest_table.pfn, + (int)v->arch.monitor_table.pfn, + (int)v->arch.shadow_table.pfn); printk("svm_vmexit_handler: Returning\n"); } #endif @@ -3088,15 +2967,17 @@ asmlinkage void svm_asid(void) struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - /* - * if need to assign new asid, or if switching cores, - * retire asid for the old core, and assign a new asid to the current core. - */ + /* + * if need to assign new asid, or if switching cores, + * retire asid for the old core, and assign a new asid to the current core. + */ if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) || - ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) { + ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) { /* recycle asid */ - if ( !asidpool_assign_next( vmcb, 1, - v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) { + if ( !asidpool_assign_next(vmcb, 1, + v->arch.hvm_svm.asid_core, + v->arch.hvm_svm.launch_core) ) + { /* If we get here, we have a major problem */ domain_crash_synchronous(); } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/svm/vmcb.c Sun Aug 20 11:08:45 2006 -0400 @@ -309,10 +309,6 @@ int construct_vmcb(struct arch_svm_struc goto err_out; } - /* update the HSA for the current Core */ -#if 0 - set_hsa_to_guest( arch_svm ); -#endif arch_svm->vmcb_pa = (u64) virt_to_maddr(arch_svm->vmcb); if ((error = construct_vmcb_controls(arch_svm))) @@ -364,8 +360,7 @@ void svm_do_launch(struct vcpu *v) if (hvm_apic_support(v->domain)) vlapic_init(v); - init_timer(&v->arch.hvm_svm.hlt_timer, - hlt_timer_fn, v, v->processor); + init_timer(&v->arch.hvm_vcpu.hlt_timer, hlt_timer_fn, v, v->processor); vmcb->ldtr.sel = 0; vmcb->ldtr.base = 0; @@ -385,8 +380,8 @@ void svm_do_launch(struct vcpu *v) printk("%s: phys_table = %lx\n", __func__, pt); } - /* At launch we always use the phys_table */ - vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table); + /* Set cr3 from hw_cr3 even when guest-visible paging is not enabled */ + vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; if (svm_dbg_on) { @@ -401,7 +396,7 @@ void svm_do_launch(struct vcpu *v) v->arch.hvm_svm.saved_irq_vector = -1; hvm_set_guest_time(v, 0); - + if (svm_dbg_on) svm_dump_vmcb(__func__, vmcb); @@ -432,24 +427,24 @@ void svm_dump_vmcb(const char *from, str vmcb->general1_intercepts, vmcb->general2_intercepts); printf("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = " "0x%016llx\n", - (unsigned long long) vmcb->iopm_base_pa, - (unsigned long long) vmcb->msrpm_base_pa, - (unsigned long long) vmcb->tsc_offset); + (unsigned long long) vmcb->iopm_base_pa, + (unsigned long long) vmcb->msrpm_base_pa, + (unsigned long long) vmcb->tsc_offset); printf("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = " "0x%016llx\n", vmcb->tlb_control, - (unsigned long long) vmcb->vintr.bytes, - (unsigned long long) vmcb->interrupt_shadow); + (unsigned long long) vmcb->vintr.bytes, + (unsigned long long) vmcb->interrupt_shadow); printf("exitcode = 0x%016llx exitintinfo = 0x%016llx\n", (unsigned long long) vmcb->exitcode, - (unsigned long long) vmcb->exitintinfo.bytes); + (unsigned long long) vmcb->exitintinfo.bytes); printf("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n", (unsigned long long) vmcb->exitinfo1, - (unsigned long long) vmcb->exitinfo2); + (unsigned long long) vmcb->exitinfo2); printf("np_enable = 0x%016llx guest_asid = 0x%03x\n", (unsigned long long) vmcb->np_enable, vmcb->guest_asid); printf("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n", vmcb->cpl, (unsigned long long) vmcb->efer, - (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar); + (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar); printf("CR0 = 0x%016llx CR2 = 0x%016llx\n", (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2); printf("CR3 = 0x%016llx CR4 = 0x%016llx\n", @@ -465,7 +460,7 @@ void svm_dump_vmcb(const char *from, str (unsigned long long) vmcb->sfmask); printf("KernGSBase = 0x%016llx PAT = 0x%016llx \n", (unsigned long long) vmcb->kerngsbase, - (unsigned long long) vmcb->g_pat); + (unsigned long long) vmcb->g_pat); /* print out all the selectors */ svm_dump_sel("CS", &vmcb->cs); diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/x86_32/exits.S --- a/xen/arch/x86/hvm/svm/x86_32/exits.S Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/svm/x86_32/exits.S Sun Aug 20 11:08:45 2006 -0400 @@ -56,8 +56,8 @@ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers. */ -#define HVM_MONITOR_EFLAGS 0x202 /* IF on */ -#define NR_SKIPPED_REGS 6 /* See the above explanation */ +#define HVM_MONITOR_EFLAGS 0x202 /* IF on */ +#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ pushl $HVM_MONITOR_EFLAGS; \ popf; \ @@ -95,8 +95,8 @@ ENTRY(svm_asm_do_launch) movl VCPU_svm_vmcb(%ebx), %ecx movl 24(%esp), %eax movl %eax, VMCB_rax(%ecx) - movl VCPU_processor(%ebx), %eax - movl root_vmcb_pa(,%eax,8), %eax + movl VCPU_processor(%ebx), %eax + movl root_vmcb_pa(,%eax,8), %eax VMSAVE movl VCPU_svm_vmcb_pa(%ebx), %eax @@ -120,8 +120,8 @@ ENTRY(svm_asm_do_launch) GET_CURRENT(%eax) - movl VCPU_processor(%eax), %eax - movl root_vmcb_pa(,%eax,8), %eax + movl VCPU_processor(%eax), %eax + movl root_vmcb_pa(,%eax,8), %eax VMLOAD HVM_SAVE_ALL_NOSEGREGS diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/x86_64/exits.S --- a/xen/arch/x86/hvm/svm/x86_64/exits.S Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/svm/x86_64/exits.S Sun Aug 20 11:08:45 2006 -0400 @@ -52,8 +52,8 @@ * (2/1) u32 entry_vector; * (1/1) u32 error_code; */ -#define HVM_MONITOR_RFLAGS 0x202 /* IF on */ -#define NR_SKIPPED_REGS 6 /* See the above explanation */ +#define HVM_MONITOR_RFLAGS 0x202 /* IF on */ +#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ pushq $HVM_MONITOR_RFLAGS; \ popfq; \ @@ -105,10 +105,9 @@ ENTRY(svm_asm_do_launch) movq VCPU_svm_vmcb(%rbx), %rcx movq UREGS_rax(%rsp), %rax movq %rax, VMCB_rax(%rcx) - leaq root_vmcb_pa(%rip), %rax - movl VCPU_processor(%rbx), %ecx - shll $3, %ecx - addq %rcx, %rax + leaq root_vmcb_pa(%rip), %rax + movl VCPU_processor(%rbx), %ecx + movq (%rax,%rcx,8), %rax VMSAVE movq VCPU_svm_vmcb_pa(%rbx), %rax @@ -139,10 +138,9 @@ ENTRY(svm_asm_do_launch) HVM_SAVE_ALL_NOSEGREGS GET_CURRENT(%rbx) - movl VCPU_processor(%rbx), %ecx - leaq root_vmcb_pa(%rip), %rax - shll $3, %ecx - addq %rcx, %rax + leaq root_vmcb_pa(%rip), %rax + movl VCPU_processor(%rbx), %ecx + movq (%rax,%rcx,8), %rax VMLOAD STGI @@ -151,13 +149,13 @@ ENTRY(svm_asm_do_launch) ENTRY(svm_asm_do_resume) svm_test_all_events: - GET_CURRENT(%rbx) + GET_CURRENT(%rbx) movq %rbx, %rdi call hvm_do_resume /*test_all_events:*/ cli # tests must not race interrupts /*test_softirqs:*/ - movl VCPU_processor(%rbx),%eax + movl VCPU_processor(%rbx),%eax shl $IRQSTAT_shift, %rax leaq irq_stat(%rip), %rdx testl $~0, (%rdx, %rax, 1) diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/vioapic.c Sun Aug 20 11:08:45 2006 -0400 @@ -44,7 +44,7 @@ #define IRQ0_SPECIAL_ROUTING 1 #if defined(__ia64__) -#define opt_hvm_debug_level opt_vmx_debug_level +#define opt_hvm_debug_level opt_vmx_debug_level #endif static void ioapic_enable(hvm_vioapic_t *s, uint8_t enable) @@ -264,7 +264,7 @@ static void hvm_vioapic_reset(hvm_vioapi for (i = 0; i < IOAPIC_NUM_PINS; i++) { s->redirtbl[i].RedirForm.mask = 0x1; - hvm_vioapic_update_imr(s, i); + hvm_vioapic_update_imr(s, i); } } @@ -364,7 +364,7 @@ static uint32_t ioapic_get_delivery_bitm if (dest_mode == 0) { /* Physical mode */ for (i = 0; i < s->lapic_count; i++) { - if (VLAPIC_ID(s->lapic_info[i]) == dest) { + if (VLAPIC_ID(s->lapic_info[i]) == dest) { mask = 1 << i; break; } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/vlapic.c Sun Aug 20 11:08:45 2006 -0400 @@ -21,7 +21,8 @@ #include <xen/types.h> #include <xen/mm.h> #include <xen/xmalloc.h> -#include <asm/shadow.h> +#include <xen/shadow.h> +#include <xen/domain_page.h> #include <asm/page.h> #include <xen/event.h> #include <xen/trace.h> diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Sun Aug 20 11:08:45 2006 -0400 @@ -34,12 +34,8 @@ #include <asm/flushtlb.h> #include <xen/event.h> #include <xen/kernel.h> -#include <asm/shadow.h> #include <xen/keyhandler.h> - -#if CONFIG_PAGING_LEVELS >= 3 -#include <asm/shadow_64.h> -#endif +#include <asm/shadow2.h> static int vmcs_size; static int vmcs_order; @@ -238,7 +234,7 @@ static void vmx_set_host_env(struct vcpu static void vmx_do_launch(struct vcpu *v) { -/* Update CR3, GDT, LDT, TR */ +/* Update CR3, CR0, CR4, GDT, LDT, TR */ unsigned int error = 0; unsigned long cr0, cr4; @@ -267,7 +263,7 @@ static void vmx_do_launch(struct vcpu *v vlapic_init(v); vmx_set_host_env(v); - init_timer(&v->arch.hvm_vmx.hlt_timer, hlt_timer_fn, v, v->processor); + init_timer(&v->arch.hvm_vcpu.hlt_timer, hlt_timer_fn, v, v->processor); error |= __vmwrite(GUEST_LDTR_SELECTOR, 0); error |= __vmwrite(GUEST_LDTR_BASE, 0); @@ -276,8 +272,11 @@ static void vmx_do_launch(struct vcpu *v error |= __vmwrite(GUEST_TR_BASE, 0); error |= __vmwrite(GUEST_TR_LIMIT, 0xff); - __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table)); - __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table)); + shadow2_update_paging_modes(v); + printk("%s(): GUEST_CR3<=%08lx, HOST_CR3<=%08lx\n", + __func__, v->arch.hvm_vcpu.hw_cr3, v->arch.cr3); + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); + __vmwrite(HOST_CR3, v->arch.cr3); v->arch.schedule_tail = arch_vmx_do_resume; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/vmx.c Sun Aug 20 11:08:45 2006 -0400 @@ -26,9 +26,9 @@ #include <xen/softirq.h> #include <xen/domain_page.h> #include <xen/hypercall.h> +#include <xen/perfc.h> #include <asm/current.h> #include <asm/io.h> -#include <asm/shadow.h> #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> @@ -40,10 +40,7 @@ #include <asm/hvm/vmx/vmx.h> #include <asm/hvm/vmx/vmcs.h> #include <asm/hvm/vmx/cpu.h> -#include <asm/shadow.h> -#if CONFIG_PAGING_LEVELS >= 3 -#include <asm/shadow_64.h> -#endif +#include <asm/shadow2.h> #include <public/sched.h> #include <public/hvm/ioreq.h> #include <asm/hvm/vpic.h> @@ -69,11 +66,16 @@ static int vmx_initialize_guest_resource if ( v->vcpu_id != 0 ) return 1; + if ( !shadow2_mode_external(d) ) + { + DPRINTK("Can't init HVM for dom %u vcpu %u: " + "not in shadow2 external mode\n", + d->domain_id, v->vcpu_id); + domain_crash(d); + } + for_each_vcpu ( d, vc ) { - /* Initialize monitor page table */ - vc->arch.monitor_table = pagetable_null(); - memset(&vc->arch.hvm_vmx, 0, sizeof(struct arch_vmx_struct)); if ( (rc = vmx_create_vmcs(vc)) != 0 ) @@ -107,6 +109,7 @@ static int vmx_initialize_guest_resource vc->arch.hvm_vmx.io_bitmap_a = io_bitmap_a; vc->arch.hvm_vmx.io_bitmap_b = io_bitmap_b; + } /* @@ -116,11 +119,6 @@ static int vmx_initialize_guest_resource memset(&d->shared_info->evtchn_mask[0], 0xff, sizeof(d->shared_info->evtchn_mask)); - /* Put the domain in shadow mode even though we're going to be using - * the shared 1:1 page table initially. It shouldn't hurt */ - shadow_mode_enable( - d, SHM_enable|SHM_refcounts|SHM_translate|SHM_external|SHM_wr_pt_pte); - return 1; } @@ -133,8 +131,7 @@ static void vmx_relinquish_guest_resourc vmx_destroy_vmcs(v); if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) continue; - free_monitor_pagetable(v); - kill_timer(&v->arch.hvm_vmx.hlt_timer); + kill_timer(&v->arch.hvm_vcpu.hlt_timer); if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) ) { kill_timer(&VLAPIC(v)->vlapic_timer); @@ -149,12 +146,10 @@ static void vmx_relinquish_guest_resourc if ( d->arch.hvm_domain.shared_page_va ) unmap_domain_page_global( - (void *)d->arch.hvm_domain.shared_page_va); + (void *)d->arch.hvm_domain.shared_page_va); if ( d->arch.hvm_domain.buffered_io_va ) unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va); - - shadow_direct_map_clean(d); } #ifdef __x86_64__ @@ -496,7 +491,7 @@ void vmx_migrate_timers(struct vcpu *v) if ( pt->enabled ) { migrate_timer(&pt->timer, v->processor); - migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor); + migrate_timer(&v->arch.hvm_vcpu.hlt_timer, v->processor); } if ( hvm_apic_support(v->domain) && VLAPIC(v)) migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor); @@ -595,20 +590,12 @@ static void vmx_load_cpu_guest_regs(stru vmx_vmcs_exit(v); } -static int vmx_realmode(struct vcpu *v) -{ - unsigned long rflags; - - __vmread(GUEST_RFLAGS, &rflags); - return rflags & X86_EFLAGS_VM; -} - static int vmx_instruction_length(struct vcpu *v) { unsigned long inst_len; if (__vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len)) - return 0; + return 0; return inst_len; } @@ -622,6 +609,8 @@ static unsigned long vmx_get_ctrl_reg(st return v->arch.hvm_vmx.cpu_cr2; case 3: return v->arch.hvm_vmx.cpu_cr3; + case 4: + return v->arch.hvm_vmx.cpu_shadow_cr4; default: BUG(); } @@ -753,8 +742,12 @@ static void vmx_setup_hvm_funcs(void) hvm_funcs.realmode = vmx_realmode; hvm_funcs.paging_enabled = vmx_paging_enabled; + hvm_funcs.long_mode_enabled = vmx_long_mode_enabled; + hvm_funcs.guest_x86_mode = vmx_guest_x86_mode; hvm_funcs.instruction_length = vmx_instruction_length; hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg; + + hvm_funcs.update_host_cr3 = vmx_update_host_cr3; hvm_funcs.stts = vmx_stts; hvm_funcs.set_tsc_offset = vmx_set_tsc_offset; @@ -855,53 +848,25 @@ static void inline __update_guest_eip(un __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); } - static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) { - unsigned long gpa; /* FIXME: PAE */ int result; #if 0 /* keep for debugging */ { - unsigned long eip; - + unsigned long eip, cs; + + __vmread(GUEST_CS_BASE, &cs); __vmread(GUEST_RIP, &eip); HVM_DBG_LOG(DBG_LEVEL_VMMU, - "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", - va, eip, (unsigned long)regs->error_code); + "vmx_do_page_fault = 0x%lx, cs_base=%lx, " + "eip = %lx, error_code = %lx\n", + va, cs, eip, (unsigned long)regs->error_code); } #endif - if ( !vmx_paging_enabled(current) ) - { - /* construct 1-to-1 direct mapping */ - if ( shadow_direct_map_fault(va, regs) ) - return 1; - - handle_mmio(va, va); - TRACE_VMEXIT (2,2); - return 1; - } - gpa = gva_to_gpa(va); - - /* Use 1:1 page table to identify MMIO address space */ - if ( mmio_space(gpa) ){ - struct vcpu *v = current; - /* No support for APIC */ - if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) { - u32 inst_len; - __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len)); - __update_guest_eip(inst_len); - return 1; - } - TRACE_VMEXIT (2,2); - /* in the case of MMIO, we are more interested in gpa than in va */ - TRACE_VMEXIT (4,gpa); - handle_mmio(va, gpa); - return 1; - } - - result = shadow_fault(va, regs); + result = shadow2_fault(va, regs); + TRACE_VMEXIT (2,result); #if 0 if ( !result ) @@ -972,23 +937,11 @@ static void vmx_vmexit_do_cpuid(struct c clear_bit(X86_FEATURE_APIC, &edx); } -#if CONFIG_PAGING_LEVELS < 3 - edx &= ~(bitmaskof(X86_FEATURE_PAE) | - bitmaskof(X86_FEATURE_PSE) | - bitmaskof(X86_FEATURE_PSE36)); -#else - if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) - { - if ( v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) - clear_bit(X86_FEATURE_PSE36, &edx); - else - { - clear_bit(X86_FEATURE_PAE, &edx); - clear_bit(X86_FEATURE_PSE, &edx); - clear_bit(X86_FEATURE_PSE36, &edx); - } - } +#if CONFIG_PAGING_LEVELS >= 3 + if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] ) #endif + clear_bit(X86_FEATURE_PAE, &edx); + clear_bit(X86_FEATURE_PSE36, &edx); ebx &= NUM_THREADS_RESET_MASK; @@ -1086,7 +1039,7 @@ static void vmx_vmexit_do_invlpg(unsigne * We do the safest things first, then try to update the shadow * copying from guest */ - shadow_invlpg(v, va); + shadow2_invlpg(v, va); } @@ -1141,7 +1094,7 @@ static int check_for_null_selector(unsig extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port, unsigned long count, int size, long value, - int dir, int pvalid); + int dir, int pvalid); static void vmx_io_instruction(unsigned long exit_qualification, unsigned long inst_len) @@ -1307,11 +1260,8 @@ vmx_world_restore(struct vcpu *v, struct error |= __vmwrite(CR0_READ_SHADOW, c->cr0); - if (!vmx_paging_enabled(v)) { - HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table"); - __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table)); + if (!vmx_paging_enabled(v)) goto skip_cr3; - } if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) { /* @@ -1325,7 +1275,6 @@ vmx_world_restore(struct vcpu *v, struct domain_crash_synchronous(); return 0; } - shadow_sync_all(v->domain); } else { /* * If different, make a shadow. Check if the PDBR is valid @@ -1348,12 +1297,16 @@ vmx_world_restore(struct vcpu *v, struct * arch.shadow_table should now hold the next CR3 for shadow */ v->arch.hvm_vmx.cpu_cr3 = c->cr3; - update_pagetables(v); + } + + skip_cr3: + + shadow2_update_paging_modes(v); + if (!vmx_paging_enabled(v)) + HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table"); + else HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3); - __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table)); - } - - skip_cr3: + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); error |= __vmread(CR4_READ_SHADOW, &old_cr4); error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK)); @@ -1485,6 +1438,7 @@ static int vmx_set_cr0(unsigned long val int paging_enabled; unsigned long vm_entry_value; unsigned long old_cr0; + unsigned long old_base_mfn; /* * CR0: We don't want to lose PE and PG. @@ -1514,7 +1468,8 @@ static int vmx_set_cr0(unsigned long val v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) || !get_page(mfn_to_page(mfn), v->domain) ) { - printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3); + printk("Invalid CR3 value = %lx (mfn=%lx)\n", + v->arch.hvm_vmx.cpu_cr3, mfn); domain_crash_synchronous(); /* need to take a clean path */ } @@ -1539,51 +1494,22 @@ static int vmx_set_cr0(unsigned long val __vmread(VM_ENTRY_CONTROLS, &vm_entry_value); vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); - - if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L4) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ - } - } - else -#endif /* __x86_64__ */ - { -#if CONFIG_PAGING_LEVELS >= 3 - /* seems it's a 32-bit or 32-bit PAE guest */ - - if ( test_bit(VMX_CPU_STATE_PAE_ENABLED, - &v->arch.hvm_vmx.cpu_state) ) - { - /* The guest enables PAE first and then it enables PG, it is - * really a PAE guest */ - if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); - } - } - else - { - if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ - } - } + } #endif - } /* * Now arch.guest_table points to machine physical. */ + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); - update_pagetables(v); + if (old_base_mfn) + put_page(mfn_to_page(old_base_mfn)); + shadow2_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table)); + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); /* * arch->shadow_table should hold the next CR3 for shadow */ @@ -1625,7 +1551,6 @@ static int vmx_set_cr0(unsigned long val } } - clear_all_shadow_status(v->domain); if ( vmx_assist(v, VMX_ASSIST_INVOKE) ) { set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state); __vmread(GUEST_RIP, &eip); @@ -1651,9 +1576,8 @@ static int vmx_set_cr0(unsigned long val } else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) { - /* we should take care of this kind of situation */ - clear_all_shadow_status(v->domain); - __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table)); + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); + shadow2_update_paging_modes(v); } return 1; @@ -1738,7 +1662,7 @@ static int mov_to_cr(int gp, int cr, str mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(v->arch.guest_table)) __hvm_bug(regs); - shadow_sync_all(v->domain); + shadow2_update_cr3(v); } else { /* * If different, make a shadow. Check if the PDBR is valid @@ -1759,16 +1683,11 @@ static int mov_to_cr(int gp, int cr, str /* * arch.shadow_table should now hold the next CR3 for shadow */ -#if CONFIG_PAGING_LEVELS >= 3 - if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 ) - shadow_sync_all(v->domain); -#endif - v->arch.hvm_vmx.cpu_cr3 = value; - update_pagetables(v); + update_cr3(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); - __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table)); + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); } break; } @@ -1785,12 +1704,6 @@ static int mov_to_cr(int gp, int cr, str /* The guest is a 32-bit PAE guest. */ #if CONFIG_PAGING_LEVELS >= 3 unsigned long mfn, old_base_mfn; - - if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) - { - printk("Unsupported guest paging levels\n"); - domain_crash_synchronous(); /* need to take a clean path */ - } if ( !VALID_MFN(mfn = get_mfn_from_gpfn( v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) || @@ -1800,21 +1713,20 @@ static int mov_to_cr(int gp, int cr, str domain_crash_synchronous(); /* need to take a clean path */ } - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); /* * Now arch.guest_table points to machine physical. */ + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); - update_pagetables(v); + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table)); + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); /* * arch->shadow_table should hold the next CR3 for shadow @@ -1822,27 +1734,6 @@ static int mov_to_cr(int gp, int cr, str HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vmx.cpu_cr3, mfn); -#endif - } - else - { - /* The guest is a 64 bit or 32-bit PAE guest. */ -#if CONFIG_PAGING_LEVELS >= 3 - if ( (v->domain->arch.ops != NULL) && - v->domain->arch.ops->guest_paging_levels == PAGING_L2) - { - /* Seems the guest first enables PAE without enabling PG, - * it must enable PG after that, and it is a 32-bit PAE - * guest */ - - if ( !shadow_set_guest_paging_levels(v->domain, - PAGING_L3) ) - { - printk("Unsupported guest paging levels\n"); - /* need to take a clean path */ - domain_crash_synchronous(); - } - } #endif } } @@ -1864,8 +1755,7 @@ static int mov_to_cr(int gp, int cr, str * all TLB entries except global entries. */ if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) - shadow_sync_all(v->domain); - + shadow2_update_paging_modes(v); break; } default: @@ -2049,23 +1939,11 @@ static inline void vmx_do_msr_write(stru (unsigned long)regs->edx); } -/* - * Need to use this exit to reschedule - */ void vmx_vmexit_do_hlt(void) { - struct vcpu *v=current; - struct periodic_time *pt = &(v->domain->arch.hvm_domain.pl_time.periodic_tm); - s_time_t next_pit=-1,next_wakeup; - - if ( !v->vcpu_id ) - next_pit = get_scheduled(v, pt->irq, pt); - next_wakeup = get_apictime_scheduled(v); - if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) - next_wakeup = next_pit; - if ( next_wakeup != - 1 ) - set_timer(¤t->arch.hvm_vmx.hlt_timer, next_wakeup); - do_sched_op_compat(SCHEDOP_block, 0); + unsigned long rflags; + __vmread(GUEST_RFLAGS, &rflags); + hvm_hlt(rflags); } static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs) @@ -2395,8 +2273,6 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_DR_ACCESS: __vmread(EXIT_QUALIFICATION, &exit_qualification); vmx_dr_access(exit_qualification, ®s); - __get_instruction_length(inst_len); - __update_guest_eip(inst_len); break; case EXIT_REASON_IO_INSTRUCTION: __vmread(EXIT_QUALIFICATION, &exit_qualification); diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Sun Aug 20 11:08:45 2006 -0400 @@ -55,7 +55,7 @@ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers. */ -#define NR_SKIPPED_REGS 6 /* See the above explanation */ +#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ subl $(NR_SKIPPED_REGS*4), %esp; \ movl $0, 0xc(%esp); /* XXX why do we need to force eflags==0 ?? */ \ diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Sun Aug 20 11:08:45 2006 -0400 @@ -51,7 +51,7 @@ * (2/1) u32 entry_vector; * (1/1) u32 error_code; */ -#define NR_SKIPPED_REGS 6 /* See the above explanation */ +#define NR_SKIPPED_REGS 6 /* See the above explanation */ #define HVM_SAVE_ALL_NOSEGREGS \ subq $(NR_SKIPPED_REGS*8), %rsp; \ pushq %rdi; \ diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/i387.c --- a/xen/arch/x86/i387.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/i387.c Sun Aug 20 11:08:45 2006 -0400 @@ -5,7 +5,7 @@ * * Pentium III FXSR, SSE support * General FPU state handling cleanups - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 + * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 */ #include <xen/config.h> diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/io_apic.c Sun Aug 20 11:08:45 2006 -0400 @@ -639,7 +639,7 @@ static int pin_2_irq(int idx, int apic, } default: { - printk(KERN_ERR "unknown bus type %d.\n",bus); + printk(KERN_ERR "unknown bus type %d.\n",bus); irq = 0; break; } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/microcode.c --- a/xen/arch/x86/microcode.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/microcode.c Sun Aug 20 11:08:45 2006 -0400 @@ -152,7 +152,7 @@ static void collect_cpu_info (void *unus unsigned int val[2]; uci->sig = uci->pf = uci->rev = uci->cksum = 0; - uci->err = MC_NOTFOUND; + uci->err = MC_NOTFOUND; uci->mc = NULL; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -460,7 +460,7 @@ int microcode_update(void *buf, unsigned int ret; if (len < DEFAULT_UCODE_TOTALSIZE) { - printk(KERN_ERR "microcode: not enough data\n"); + printk(KERN_ERR "microcode: not enough data\n"); return -EINVAL; } diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/mm.c Sun Aug 20 11:08:45 2006 -0400 @@ -137,7 +137,7 @@ static void free_l1_table(struct page_in static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long, unsigned long type); -static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t); +static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t, unsigned long gl1mfn); /* Used to defer flushing of memory structures. */ struct percpu_mm_info { @@ -234,6 +234,21 @@ void arch_init_memory(void) subarch_init_memory(); } +int memory_is_conventional_ram(paddr_t p) +{ + int i; + + for ( i = 0; i < e820.nr_map; i++ ) + { + if ( (e820.map[i].type == E820_RAM) && + (e820.map[i].addr <= p) && + (e820.map[i].size > p) ) + return 1; + } + + return 0; +} + void share_xen_page_with_guest( struct page_info *page, struct domain *d, int readonly) { @@ -274,9 +289,9 @@ void share_xen_page_with_privileged_gues #else /* * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths. - * We cannot safely shadow the idle page table, nor shadow-mode page tables + * We cannot safely shadow the idle page table, nor shadow (v1) page tables * (detected by lack of an owning domain). As required for correctness, we - * always shadow PDPTs aboive 4GB. + * always shadow PDPTs above 4GB. */ #define l3tab_needs_shadow(mfn) \ (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \ @@ -297,17 +312,21 @@ static int __init cache_pae_fixmap_addre } __initcall(cache_pae_fixmap_address); -static void __write_ptbase(unsigned long mfn) +static DEFINE_PER_CPU(u32, make_cr3_timestamp); + +void make_cr3(struct vcpu *v, unsigned long mfn) +/* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if + * necessary, and sets v->arch.cr3 to the value to load in CR3. */ { l3_pgentry_t *highmem_l3tab, *lowmem_l3tab; - struct pae_l3_cache *cache = ¤t->arch.pae_l3_cache; + struct pae_l3_cache *cache = &v->arch.pae_l3_cache; unsigned int cpu = smp_processor_id(); - /* Fast path 1: does this mfn need a shadow at all? */ + /* Fast path: does this mfn need a shadow at all? */ if ( !l3tab_needs_shadow(mfn) ) { - write_cr3(mfn << PAGE_SHIFT); - /* Cache is no longer in use or valid (/after/ write to %cr3). */ + v->arch.cr3 = mfn << PAGE_SHIFT; + /* Cache is no longer in use or valid */ cache->high_mfn = 0; return; } @@ -315,13 +334,6 @@ static void __write_ptbase(unsigned long /* Caching logic is not interrupt safe. */ ASSERT(!in_irq()); - /* Fast path 2: is this mfn already cached? */ - if ( cache->high_mfn == mfn ) - { - write_cr3(__pa(cache->table[cache->inuse_idx])); - return; - } - /* Protects against pae_flush_pgd(). */ spin_lock(&cache->lock); @@ -330,29 +342,33 @@ static void __write_ptbase(unsigned long /* Map the guest L3 table and copy to the chosen low-memory cache. */ *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); + /* First check the previous high mapping can't be in the TLB. + * (i.e. have we loaded CR3 since we last did this?) */ + if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) ) + local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu)); highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu); lowmem_l3tab = cache->table[cache->inuse_idx]; memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0])); *(fix_pae_highmem_pl1e - cpu) = l1e_empty(); - - /* Install the low-memory L3 table in CR3. */ - write_cr3(__pa(lowmem_l3tab)); + this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time); + + v->arch.cr3 = __pa(lowmem_l3tab); spin_unlock(&cache->lock); } #else /* !CONFIG_X86_PAE */ -static void __write_ptbase(unsigned long mfn) -{ - write_cr3(mfn << PAGE_SHIFT); +void make_cr3(struct vcpu *v, unsigned long mfn) +{ + v->arch.cr3 = mfn << PAGE_SHIFT; } #endif /* !CONFIG_X86_PAE */ void write_ptbase(struct vcpu *v) { - __write_ptbase(pagetable_get_pfn(v->arch.monitor_table)); + write_cr3(v->arch.cr3); } void invalidate_shadow_ldt(struct vcpu *v) @@ -423,8 +439,6 @@ int map_ldt_shadow_page(unsigned int off BUG_ON(unlikely(in_irq())); - shadow_sync_va(v, gva); - TOGGLE_MODE(); __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)], sizeof(l1e)); @@ -440,12 +454,12 @@ int map_ldt_shadow_page(unsigned int off res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); - if ( !res && unlikely(shadow_mode_refcounts(d)) ) - { - shadow_lock(d); - shadow_remove_all_write_access(d, gmfn, mfn); + if ( !res && unlikely(shadow2_mode_refcounts(d)) ) + { + shadow2_lock(d); + shadow2_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0); res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); - shadow_unlock(d); + shadow2_unlock(d); } if ( unlikely(!res) ) @@ -513,7 +527,7 @@ get_linear_pagetable( struct page_info *page; unsigned long pfn; - ASSERT( !shadow_mode_refcounts(d) ); + ASSERT( !shadow2_mode_refcounts(d) ); if ( (root_get_flags(re) & _PAGE_RW) ) { @@ -576,7 +590,8 @@ get_page_from_l1e( if ( !iomem_access_permitted(d, mfn, mfn) ) { - MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn); + MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx", + d->domain_id, mfn); return 0; } @@ -587,9 +602,14 @@ get_page_from_l1e( d = dom_io; } - okay = ((l1e_get_flags(l1e) & _PAGE_RW) ? - get_page_and_type(page, d, PGT_writable_page) : - get_page(page, d)); + /* Foreign mappings into guests in shadow2 external mode don't + * contribute to writeable mapping refcounts. (This allows the + * qemu-dm helper process in dom0 to map the domain's memory without + * messing up the count of "real" writable mappings.) */ + okay = (((l1e_get_flags(l1e) & _PAGE_RW) && + !(unlikely(shadow2_mode_external(d) && (d != current->domain)))) + ? get_page_and_type(page, d, PGT_writable_page) + : get_page(page, d)); if ( !okay ) { MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte @@ -609,8 +629,6 @@ get_page_from_l2e( struct domain *d, unsigned long vaddr) { int rc; - - ASSERT(!shadow_mode_refcounts(d)); if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) return 1; @@ -641,8 +659,6 @@ get_page_from_l3e( { int rc; - ASSERT(!shadow_mode_refcounts(d)); - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) return 1; @@ -668,8 +684,6 @@ get_page_from_l4e( struct domain *d, unsigned long vaddr) { int rc; - - ASSERT( !shadow_mode_refcounts(d) ); if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) return 1; @@ -727,7 +741,10 @@ void put_page_from_l1e(l1_pgentry_t l1e, domain_crash(d); } - if ( l1e_get_flags(l1e) & _PAGE_RW ) + /* Remember we didn't take a type-count of foreign writable mappings + * to shadow2 external domains */ + if ( (l1e_get_flags(l1e) & _PAGE_RW) && + !(unlikely((e != d) && shadow2_mode_external(e))) ) { put_page_and_type(page); } @@ -784,7 +801,7 @@ static int alloc_l1_table(struct page_in l1_pgentry_t *pl1e; int i; - ASSERT(!shadow_mode_refcounts(d)); + ASSERT(!shadow2_mode_refcounts(d)); pl1e = map_domain_page(pfn); @@ -832,6 +849,8 @@ static int create_pae_xen_mappings(l3_pg * 2. Cannot appear in another page table's L3: * a. alloc_l3_table() calls this function and this check will fail * b. mod_l3_entry() disallows updates to slot 3 in an existing table + * + * XXX -- this needs revisiting for shadow2_mode_refcount()==true... */ page = l3e_get_page(l3e3); BUG_ON(page->u.inuse.type_info & PGT_pinned); @@ -955,11 +974,7 @@ static int alloc_l2_table(struct page_in l2_pgentry_t *pl2e; int i; - /* See the code in shadow_promote() to understand why this is here. */ - if ( (PGT_base_page_table == PGT_l2_page_table) && - unlikely(shadow_mode_refcounts(d)) ) - return 1; - ASSERT(!shadow_mode_refcounts(d)); + ASSERT(!shadow2_mode_refcounts(d)); pl2e = map_domain_page(pfn); @@ -1009,11 +1024,7 @@ static int alloc_l3_table(struct page_in l3_pgentry_t *pl3e; int i; - /* See the code in shadow_promote() to understand why this is here. */ - if ( (PGT_base_page_table == PGT_l3_page_table) && - shadow_mode_refcounts(d) ) - return 1; - ASSERT(!shadow_mode_refcounts(d)); + ASSERT(!shadow2_mode_refcounts(d)); #ifdef CONFIG_X86_PAE /* @@ -1072,11 +1083,7 @@ static int alloc_l4_table(struct page_in unsigned long vaddr; int i; - /* See the code in shadow_promote() to understand why this is here. */ - if ( (PGT_base_page_table == PGT_l4_page_table) && - shadow_mode_refcounts(d) ) - return 1; - ASSERT(!shadow_mode_refcounts(d)); + ASSERT(!shadow2_mode_refcounts(d)); for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) { @@ -1183,51 +1190,61 @@ static void free_l4_table(struct page_in static inline int update_l1e(l1_pgentry_t *pl1e, l1_pgentry_t ol1e, - l1_pgentry_t nl1e) -{ + l1_pgentry_t nl1e, + unsigned long gl1mfn, + struct vcpu *v) +{ + int rv = 1; + if ( unlikely(shadow2_mode_enabled(v->domain)) ) + shadow2_lock(v->domain); #ifndef PTE_UPDATE_WITH_CMPXCHG - return !__copy_to_user(pl1e, &nl1e, sizeof(nl1e)); + rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e))); #else - intpte_t o = l1e_get_intpte(ol1e); - intpte_t n = l1e_get_intpte(nl1e); - - for ( ; ; ) - { - if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) - { - MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte - ": saw %" PRIpte, - l1e_get_intpte(ol1e), - l1e_get_intpte(nl1e), - o); - return 0; - } - - if ( o == l1e_get_intpte(ol1e) ) - break; - - /* Allowed to change in Accessed/Dirty flags only. */ - BUG_ON((o ^ l1e_get_intpte(ol1e)) & - ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); - ol1e = l1e_from_intpte(o); - } - - return 1; + { + intpte_t o = l1e_get_intpte(ol1e); + intpte_t n = l1e_get_intpte(nl1e); + + for ( ; ; ) + { + if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) + { + MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte + ": saw %" PRIpte, + l1e_get_intpte(ol1e), + l1e_get_intpte(nl1e), + o); + rv = 0; + break; + } + + if ( o == l1e_get_intpte(ol1e) ) + break; + + /* Allowed to change in Accessed/Dirty flags only. */ + BUG_ON((o ^ l1e_get_intpte(ol1e)) & + ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); + ol1e = l1e_from_intpte(o); + } + } #endif + if ( unlikely(shadow2_mode_enabled(v->domain)) ) + { + shadow2_validate_guest_entry(v, _mfn(gl1mfn), pl1e); + shadow2_unlock(v->domain); + } + return rv; } /* Update the L1 entry at pl1e to new value nl1e. */ -static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e) +static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, + unsigned long gl1mfn) { l1_pgentry_t ol1e; struct domain *d = current->domain; if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) return 0; - - if ( unlikely(shadow_mode_refcounts(d)) ) - return update_l1e(pl1e, ol1e, nl1e); if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { @@ -1239,13 +1256,13 @@ static int mod_l1_entry(l1_pgentry_t *pl } /* Fast path for identical mapping, r/w and presence. */ - if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT)) - return update_l1e(pl1e, ol1e, nl1e); + if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) + return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) return 0; - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) + if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) { put_page_from_l1e(nl1e, d); return 0; @@ -1253,7 +1270,7 @@ static int mod_l1_entry(l1_pgentry_t *pl } else { - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) + if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) return 0; } @@ -1262,9 +1279,9 @@ static int mod_l1_entry(l1_pgentry_t *pl } #ifndef PTE_UPDATE_WITH_CMPXCHG -#define UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) +#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) #else -#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \ +#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ \ for ( ; ; ) \ { \ intpte_t __o = cmpxchg((intpte_t *)(_p), \ @@ -1279,6 +1296,18 @@ static int mod_l1_entry(l1_pgentry_t *pl } \ 1; }) #endif +#define UPDATE_ENTRY(_t,_p,_o,_n,_m) ({ \ + int rv; \ + if ( unlikely(shadow2_mode_enabled(current->domain)) ) \ + shadow2_lock(current->domain); \ + rv = _UPDATE_ENTRY(_t, _p, _o, _n); \ + if ( unlikely(shadow2_mode_enabled(current->domain)) ) \ + { \ + shadow2_validate_guest_entry(current, _mfn(_m), (_p)); \ + shadow2_unlock(current->domain); \ + } \ + rv; \ +}) /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ static int mod_l2_entry(l2_pgentry_t *pl2e, @@ -1309,19 +1338,19 @@ static int mod_l2_entry(l2_pgentry_t *pl /* Fast path for identical mapping and presence. */ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e); + return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn); if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) || unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) ) + if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) { put_page_from_l2e(nl2e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) ) + else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) { return 0; } @@ -1329,7 +1358,6 @@ static int mod_l2_entry(l2_pgentry_t *pl put_page_from_l2e(ol2e, pfn); return 1; } - #if CONFIG_PAGING_LEVELS >= 3 @@ -1356,7 +1384,7 @@ static int mod_l3_entry(l3_pgentry_t *pl */ if ( pgentry_ptr_to_slot(pl3e) >= 3 ) return 0; -#endif +#endif if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) return 0; @@ -1372,26 +1400,26 @@ static int mod_l3_entry(l3_pgentry_t *pl /* Fast path for identical mapping and presence. */ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e); + return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn); #if CONFIG_PAGING_LEVELS >= 4 if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) || unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) ) - return 0; + return 0; #else vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t)) << L3_PAGETABLE_SHIFT; if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) ) return 0; -#endif - - if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) ) +#endif + + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) { put_page_from_l3e(nl3e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) ) + else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) { return 0; } @@ -1438,19 +1466,19 @@ static int mod_l4_entry(l4_pgentry_t *pl /* Fast path for identical mapping and presence. */ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e); + return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn); if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) || unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) ) return 0; - if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) ) + if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) { put_page_from_l4e(nl4e, pfn); return 0; } } - else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) ) + else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) { return 0; } @@ -1506,18 +1534,21 @@ void free_page_type(struct page_info *pa */ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; - if ( unlikely(shadow_mode_enabled(owner)) ) + if ( unlikely(shadow2_mode_enabled(owner) + && !shadow2_lock_is_acquired(owner)) ) { /* Raw page tables are rewritten during save/restore. */ - if ( !shadow_mode_translate(owner) ) + if ( !shadow2_mode_translate(owner) ) mark_dirty(owner, page_to_mfn(page)); - if ( shadow_mode_refcounts(owner) ) + if ( shadow2_mode_refcounts(owner) ) return; gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); ASSERT(VALID_M2P(gmfn)); - remove_shadow(owner, gmfn, type & PGT_type_mask); + shadow2_lock(owner); + shadow2_remove_all_shadows(owner->vcpu[0], _mfn(gmfn)); + shadow2_unlock(owner); } } @@ -1573,9 +1604,6 @@ void put_page_type(struct page_info *pag if ( unlikely((nx & PGT_count_mask) == 0) ) { - /* Record TLB information for flush later. Races are harmless. */ - page->tlbflush_timestamp = tlbflush_current_time(); - if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && likely(nx & PGT_validated) ) { @@ -1593,6 +1621,9 @@ void put_page_type(struct page_info *pag x &= ~PGT_validated; nx &= ~PGT_validated; } + + /* Record TLB information for flush later. */ + page->tlbflush_timestamp = tlbflush_current_time(); } else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == (PGT_pinned|PGT_l1_page_table|1)) ) @@ -1682,7 +1713,7 @@ int get_page_type(struct page_info *page #endif /* Fixme: add code to propagate va_unknown to subtables. */ if ( ((type & PGT_type_mask) >= PGT_l2_page_table) && - !shadow_mode_refcounts(page_get_owner(page)) ) + !shadow2_mode_refcounts(page_get_owner(page)) ) return 0; /* This table is possibly mapped at multiple locations. */ nx &= ~PGT_va_mask; @@ -1729,7 +1760,10 @@ int new_guest_cr3(unsigned long mfn) int okay; unsigned long old_base_mfn; - if ( shadow_mode_refcounts(d) ) + if ( hvm_guest(v) && !hvm_paging_enabled(v) ) + domain_crash_synchronous(); + + if ( shadow2_mode_refcounts(d) ) { okay = get_page_from_pagenr(mfn, d); if ( unlikely(!okay) ) @@ -1747,7 +1781,7 @@ int new_guest_cr3(unsigned long mfn) MEM_LOG("New baseptr %lx: slow path via idle pagetables", mfn); old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_null(); - update_pagetables(v); + update_cr3(v); write_cr3(__pa(idle_pg_table)); if ( old_base_mfn != 0 ) put_page_and_type(mfn_to_page(old_base_mfn)); @@ -1769,30 +1803,20 @@ int new_guest_cr3(unsigned long mfn) invalidate_shadow_ldt(v); old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = pagetable_from_pfn(mfn); - update_pagetables(v); /* update shadow_table and monitor_table */ + update_cr3(v); /* update shadow_table and cr3 fields of vcpu struct */ write_ptbase(v); if ( likely(old_base_mfn != 0) ) { - if ( shadow_mode_refcounts(d) ) + if ( shadow2_mode_refcounts(d) ) put_page(mfn_to_page(old_base_mfn)); else put_page_and_type(mfn_to_page(old_base_mfn)); } - /* CR3 also holds a ref to its shadow... */ - if ( shadow_mode_enabled(d) ) - { - if ( v->arch.monitor_shadow_ref ) - put_shadow_ref(v->arch.monitor_shadow_ref); - v->arch.monitor_shadow_ref = - pagetable_get_pfn(v->arch.monitor_table); - ASSERT(!page_get_owner(mfn_to_page(v->arch.monitor_shadow_ref))); - get_shadow_ref(v->arch.monitor_shadow_ref); - } - return 1; } @@ -1807,8 +1831,6 @@ static void process_deferred_ops(void) if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) ) { - if ( shadow_mode_enabled(d) ) - shadow_sync_all(d); if ( deferred_ops & DOP_FLUSH_ALL_TLBS ) flush_tlb_mask(d->domain_dirty_cpumask); else @@ -1974,7 +1996,7 @@ int do_mmuext_op( type = PGT_root_page_table; pin_page: - if ( shadow_mode_refcounts(FOREIGNDOM) ) + if ( shadow2_mode_refcounts(FOREIGNDOM) ) break; okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); @@ -1996,7 +2018,7 @@ int do_mmuext_op( break; case MMUEXT_UNPIN_TABLE: - if ( shadow_mode_refcounts(d) ) + if ( shadow2_mode_refcounts(d) ) break; if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) ) @@ -2009,6 +2031,12 @@ int do_mmuext_op( { put_page_and_type(page); put_page(page); + if ( shadow2_mode_enabled(d) ) + { + shadow2_lock(d); + shadow2_remove_all_shadows(v, _mfn(mfn)); + shadow2_unlock(d); + } } else { @@ -2050,9 +2078,9 @@ int do_mmuext_op( break; case MMUEXT_INVLPG_LOCAL: - if ( shadow_mode_enabled(d) ) - shadow_invlpg(v, op.arg1.linear_addr); - local_flush_tlb_one(op.arg1.linear_addr); + if ( !shadow2_mode_enabled(d) + || shadow2_invlpg(v, op.arg1.linear_addr) != 0 ) + local_flush_tlb_one(op.arg1.linear_addr); break; case MMUEXT_TLB_FLUSH_MULTI: @@ -2098,7 +2126,7 @@ int do_mmuext_op( unsigned long ptr = op.arg1.linear_addr; unsigned long ents = op.arg2.nr_ents; - if ( shadow_mode_external(d) ) + if ( shadow2_mode_external(d) ) { MEM_LOG("ignoring SET_LDT hypercall from external " "domain %u", d->domain_id); @@ -2171,9 +2199,6 @@ int do_mmu_update( LOCK_BIGLOCK(d); - if ( unlikely(shadow_mode_enabled(d)) ) - check_pagetable(v, "pre-mmu"); /* debug */ - if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { count &= ~MMU_UPDATE_PREEMPTED; @@ -2190,7 +2215,7 @@ int do_mmu_update( goto out; } - perfc_incrc(calls_to_mmu_update); + perfc_incrc(calls_to_mmu_update); perfc_addc(num_page_updates, count); perfc_incr_histo(bpt_updates, count, PT_UPDATES); @@ -2248,7 +2273,12 @@ int do_mmu_update( case PGT_l3_page_table: case PGT_l4_page_table: { - ASSERT(!shadow_mode_refcounts(d)); + if ( shadow2_mode_refcounts(d) ) + { + DPRINTK("mmu update on shadow-refcounted domain!"); + break; + } + if ( unlikely(!get_page_type( page, type_info & (PGT_type_mask|PGT_va_mask))) ) goto not_a_pt; @@ -2258,10 +2288,7 @@ int do_mmu_update( case PGT_l1_page_table: { l1_pgentry_t l1e = l1e_from_intpte(req.val); - okay = mod_l1_entry(va, l1e); - if ( okay && unlikely(shadow_mode_enabled(d)) ) - shadow_l1_normal_pt_update( - d, req.ptr, l1e, &sh_mapcache); + okay = mod_l1_entry(va, l1e, mfn); } break; case PGT_l2_page_table: @@ -2269,9 +2296,6 @@ int do_mmu_update( l2_pgentry_t l2e = l2e_from_intpte(req.val); okay = mod_l2_entry( (l2_pgentry_t *)va, l2e, mfn, type_info); - if ( okay && unlikely(shadow_mode_enabled(d)) ) - shadow_l2_normal_pt_update( - d, req.ptr, l2e, &sh_mapcache); } break; #if CONFIG_PAGING_LEVELS >= 3 @@ -2279,9 +2303,6 @@ int do_mmu_update( { l3_pgentry_t l3e = l3e_from_intpte(req.val); okay = mod_l3_entry(va, l3e, mfn, type_info); - if ( okay && unlikely(shadow_mode_enabled(d)) ) - shadow_l3_normal_pt_update( - d, req.ptr, l3e, &sh_mapcache); } break; #endif @@ -2290,9 +2311,6 @@ int do_mmu_update( { l4_pgentry_t l4e = l4e_from_intpte(req.val); okay = mod_l4_entry(va, l4e, mfn, type_info); - if ( okay && unlikely(shadow_mode_enabled(d)) ) - shadow_l4_normal_pt_update( - d, req.ptr, l4e, &sh_mapcache); } break; #endif @@ -2308,19 +2326,17 @@ int do_mmu_update( if ( unlikely(!get_page_type(page, PGT_writable_page)) ) break; - if ( shadow_mode_enabled(d) ) - { - shadow_lock(d); - __mark_dirty(d, mfn); - if ( page_is_page_table(page) && !page_out_of_sync(page) ) - shadow_mark_mfn_out_of_sync(v, gmfn, mfn); - } + if ( unlikely(shadow2_mode_enabled(d)) ) + shadow2_lock(d); *(intpte_t *)va = req.val; okay = 1; - if ( shadow_mode_enabled(d) ) - shadow_unlock(d); + if ( unlikely(shadow2_mode_enabled(d)) ) + { + shadow2_validate_guest_entry(v, _mfn(mfn), va); + shadow2_unlock(d); + } put_page_type(page); } @@ -2333,12 +2349,6 @@ int do_mmu_update( break; case MMU_MACHPHYS_UPDATE: - - if ( shadow_mode_translate(FOREIGNDOM) ) - { - MEM_LOG("can't mutate m2p table of translate mode guest"); - break; - } mfn = req.ptr >> PAGE_SHIFT; gpfn = req.val; @@ -2349,9 +2359,13 @@ int do_mmu_update( break; } - set_gpfn_from_mfn(mfn, gpfn); + if ( shadow2_mode_translate(FOREIGNDOM) ) + shadow2_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn); + else + set_gpfn_from_mfn(mfn, gpfn); okay = 1; + // Mark the new gfn dirty... mark_dirty(FOREIGNDOM, mfn); put_page(mfn_to_page(mfn)); @@ -2381,9 +2395,6 @@ int do_mmu_update( done += i; if ( unlikely(!guest_handle_is_null(pdone)) ) copy_to_guest(pdone, &done, 1); - - if ( unlikely(shadow_mode_enabled(d)) ) - check_pagetable(v, "post-mmu"); /* debug */ UNLOCK_BIGLOCK(d); return rc; @@ -2402,7 +2413,6 @@ static int create_grant_pte_mapping( struct domain *d = v->domain; ASSERT(spin_is_locked(&d->big_lock)); - ASSERT(!shadow_mode_refcounts(d)); gmfn = pte_addr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); @@ -2418,7 +2428,7 @@ static int create_grant_pte_mapping( page = mfn_to_page(mfn); type_info = page->u.inuse.type_info; - if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || + if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { MEM_LOG("Grant map attempted to update a non-L1 page"); @@ -2427,28 +2437,22 @@ static int create_grant_pte_mapping( } ol1e = *(l1_pgentry_t *)va; - if ( !update_l1e(va, ol1e, _nl1e) ) + if ( !update_l1e(va, ol1e, _nl1e, mfn, v) ) { put_page_type(page); rc = GNTST_general_error; goto failed; } - put_page_from_l1e(ol1e, d); - - if ( unlikely(shadow_mode_enabled(d)) ) - { - struct domain_mmap_cache sh_mapcache; - domain_mmap_cache_init(&sh_mapcache); - shadow_l1_normal_pt_update(d, pte_addr, _nl1e, &sh_mapcache); - domain_mmap_cache_destroy(&sh_mapcache); - } + if ( !shadow2_mode_refcounts(d) ) + put_page_from_l1e(ol1e, d); put_page_type(page); failed: unmap_domain_page(va); put_page(page); + return rc; } @@ -2462,8 +2466,6 @@ static int destroy_grant_pte_mapping( u32 type_info; l1_pgentry_t ol1e; - ASSERT(!shadow_mode_refcounts(d)); - gmfn = addr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); @@ -2504,7 +2506,9 @@ static int destroy_grant_pte_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e((l1_pgentry_t *)va, ol1e, l1e_empty())) ) + if ( unlikely(!update_l1e( + (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, + d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) ) { MEM_LOG("Cannot delete PTE entry at %p", va); put_page_type(page); @@ -2512,14 +2516,6 @@ static int destroy_grant_pte_mapping( goto failed; } - if ( unlikely(shadow_mode_enabled(d)) ) - { - struct domain_mmap_cache sh_mapcache; - domain_mmap_cache_init(&sh_mapcache); - shadow_l1_normal_pt_update(d, addr, l1e_empty(), &sh_mapcache); - domain_mmap_cache_destroy(&sh_mapcache); - } - put_page_type(page); failed: @@ -2536,31 +2532,22 @@ static int create_grant_va_mapping( struct domain *d = v->domain; ASSERT(spin_is_locked(&d->big_lock)); - ASSERT(!shadow_mode_refcounts(d)); - - /* - * This is actually overkill - we don't need to sync the L1 itself, - * just everything involved in getting to this L1 (i.e. we need - * linear_pg_table[l1_linear_offset(va)] to be in sync)... - */ - __shadow_sync_va(v, va); pl1e = &linear_pg_table[l1_linear_offset(va)]; if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) || - !update_l1e(pl1e, ol1e, _nl1e) ) + !update_l1e(pl1e, ol1e, _nl1e, + l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) ) return GNTST_general_error; - put_page_from_l1e(ol1e, d); - - if ( unlikely(shadow_mode_enabled(d)) ) - shadow_do_update_va_mapping(va, _nl1e, v); + if ( !shadow2_mode_refcounts(d) ) + put_page_from_l1e(ol1e, d); return GNTST_okay; } static int destroy_grant_va_mapping( - unsigned long addr, unsigned long frame) + unsigned long addr, unsigned long frame, struct domain *d) { l1_pgentry_t *pl1e, ol1e; @@ -2584,12 +2571,14 @@ static int destroy_grant_va_mapping( } /* Delete pagetable entry. */ - if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty())) ) + if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), + l2e_get_pfn(__linear_l2_table[l2_linear_offset(addr)]), + d->vcpu[0] /* Change for per-vcpu shadows */)) ) { MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); return GNTST_general_error; } - + return 0; } @@ -2597,7 +2586,7 @@ int create_grant_host_mapping( unsigned long addr, unsigned long frame, unsigned int flags) { l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS); - + if ( (flags & GNTMAP_application_map) ) l1e_add_flags(pte,_PAGE_USER); if ( !(flags & GNTMAP_readonly) ) @@ -2613,7 +2602,7 @@ int destroy_grant_host_mapping( { if ( flags & GNTMAP_contains_pte ) return destroy_grant_pte_mapping(addr, frame, current->domain); - return destroy_grant_va_mapping(addr, frame); + return destroy_grant_va_mapping(addr, frame, current->domain); } int steal_page( @@ -2675,46 +2664,44 @@ int do_update_va_mapping(unsigned long v perfc_incrc(calls_to_update_va); - if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) + if ( unlikely(!__addr_ok(va) && !shadow2_mode_external(d)) ) return -EINVAL; + if ( unlikely(shadow2_mode_refcounts(d)) ) + { + DPRINTK("Grant op on a shadow-refcounted domain\n"); + return -EINVAL; + } + LOCK_BIGLOCK(d); - if ( unlikely(shadow_mode_enabled(d)) ) - check_pagetable(v, "pre-va"); /* debug */ - - if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)], - val)) ) - rc = -EINVAL; - - if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) ) + if ( likely(rc == 0) && unlikely(shadow2_mode_enabled(d)) ) { if ( unlikely(this_cpu(percpu_mm_info).foreign && - (shadow_mode_translate(d) || - shadow_mode_translate( + (shadow2_mode_translate(d) || + shadow2_mode_translate( this_cpu(percpu_mm_info).foreign))) ) { /* * The foreign domain's pfn's are in a different namespace. There's - * not enough information in just a gpte to figure out how to + * not enough information in just a gpte to figure out how to * (re-)shadow this entry. */ domain_crash(d); } + } + + if ( unlikely(!mod_l1_entry( + &linear_pg_table[l1_linear_offset(va)], val, + l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]))) ) + rc = -EINVAL; - rc = shadow_do_update_va_mapping(va, val, v); - - check_pagetable(v, "post-va"); /* debug */ - } - switch ( flags & UVMF_FLUSHTYPE_MASK ) { case UVMF_TLB_FLUSH: switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: - if ( unlikely(shadow_mode_enabled(d)) ) - shadow_sync_all(d); local_flush_tlb(); break; case UVMF_ALL: @@ -2733,9 +2720,9 @@ int do_update_va_mapping(unsigned long v switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: - if ( unlikely(shadow_mode_enabled(d)) ) - shadow_invlpg(current, va); - local_flush_tlb_one(va); + if ( !shadow2_mode_enabled(d) + || (shadow2_invlpg(current, va) != 0) ) + local_flush_tlb_one(va); break; case UVMF_ALL: flush_tlb_one_mask(d->domain_dirty_cpumask, va); @@ -2807,8 +2794,6 @@ long set_gdt(struct vcpu *v, if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; - - shadow_sync_all(d); /* Check the pages in the new GDT. */ for ( i = 0; i < nr_pages; i++ ) { @@ -2912,23 +2897,12 @@ long do_update_descriptor(u64 pa, u64 de break; } - if ( shadow_mode_enabled(dom) ) - { - shadow_lock(dom); - - __mark_dirty(dom, mfn); - - if ( page_is_page_table(page) && !page_out_of_sync(page) ) - shadow_mark_mfn_out_of_sync(current, gmfn, mfn); - } + mark_dirty(dom, mfn); /* All is good so make the update. */ gdt_pent = map_domain_page(mfn); memcpy(&gdt_pent[offset], &d, 8); unmap_domain_page(gdt_pent); - - if ( shadow_mode_enabled(dom) ) - shadow_unlock(dom); put_page_type(page); @@ -2981,8 +2955,8 @@ long arch_memory_op(int op, XEN_GUEST_HA default: break; } - - if ( !shadow_mode_translate(d) || (mfn == 0) ) + + if ( !shadow2_mode_translate(d) || (mfn == 0) ) { put_domain(d); return -EINVAL; @@ -3011,7 +2985,7 @@ long arch_memory_op(int op, XEN_GUEST_HA guest_physmap_add_page(d, xatp.gpfn, mfn); UNLOCK_BIGLOCK(d); - + put_domain(d); break; @@ -3075,56 +3049,6 @@ long arch_memory_op(int op, XEN_GUEST_HA /************************* * Writable Pagetables */ - -/* Re-validate a given p.t. page, given its prior snapshot */ -int revalidate_l1( - struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot) -{ - l1_pgentry_t ol1e, nl1e; - int modified = 0, i; - - for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) - { - ol1e = snapshot[i]; - nl1e = l1page[i]; - - if ( likely(l1e_get_intpte(ol1e) == l1e_get_intpte(nl1e)) ) - continue; - - /* Update number of entries modified. */ - modified++; - - /* - * Fast path for PTEs that have merely been write-protected - * (e.g., during a Unix fork()). A strict reduction in privilege. - */ - if ( likely(l1e_get_intpte(ol1e) == (l1e_get_intpte(nl1e)|_PAGE_RW)) ) - { - if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) ) - put_page_type(mfn_to_page(l1e_get_pfn(nl1e))); - continue; - } - - if ( unlikely(!get_page_from_l1e(nl1e, d)) ) - { - /* - * Make the remaining p.t's consistent before crashing, so the - * reference counts are correct. - */ - memcpy(&l1page[i], &snapshot[i], - (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t)); - - /* Crash the offending domain. */ - MEM_LOG("ptwr: Could not revalidate l1 page"); - domain_crash(d); - break; - } - - put_page_from_l1e(ol1e, d); - } - - return modified; -} static int ptwr_emulated_update( unsigned long addr, @@ -3136,7 +3060,8 @@ static int ptwr_emulated_update( unsigned long pfn; struct page_info *page; l1_pgentry_t pte, ol1e, nl1e, *pl1e; - struct domain *d = current->domain; + struct vcpu *v = current; + struct domain *d = v->domain; /* Aligned access only, thank you. */ if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) ) @@ -3192,8 +3117,26 @@ static int ptwr_emulated_update( nl1e = l1e_from_intpte(val); if ( unlikely(!get_page_from_l1e(nl1e, d)) ) { - MEM_LOG("ptwr_emulate: could not get_page_from_l1e()"); - return X86EMUL_UNHANDLEABLE; + if ( (CONFIG_PAGING_LEVELS == 3) && + (bytes == 4) && + !do_cmpxchg && + (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) + { + /* + * If this is a half-write to a PAE PTE then we assume that the + * guest has simply got the two writes the wrong way round. We + * zap the PRESENT bit on the assumption the bottom half will be + * written immediately after we return to the guest. + */ + MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte"\n", + l1e_get_intpte(nl1e)); + l1e_remove_flags(nl1e, _PAGE_PRESENT); + } + else + { + MEM_LOG("ptwr_emulate: could not get_page_from_l1e()"); + return X86EMUL_UNHANDLEABLE; + } } /* Checked successfully: do the update (write or cmpxchg). */ @@ -3201,20 +3144,30 @@ static int ptwr_emulated_update( pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); if ( do_cmpxchg ) { + if ( shadow2_mode_enabled(d) ) + shadow2_lock(d); ol1e = l1e_from_intpte(old); if ( cmpxchg((intpte_t *)pl1e, old, val) != old ) { + if ( shadow2_mode_enabled(d) ) + shadow2_unlock(d); unmap_domain_page(pl1e); put_page_from_l1e(nl1e, d); return X86EMUL_CMPXCHG_FAILED; } + if ( unlikely(shadow2_mode_enabled(v->domain)) ) + { + shadow2_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e); + shadow2_unlock(v->domain); + } } else { ol1e = *pl1e; - if ( !update_l1e(pl1e, ol1e, nl1e) ) + if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) ) BUG(); } + unmap_domain_page(pl1e); /* Finally, drop the old PTE. */ diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/mpparse.c --- a/xen/arch/x86/mpparse.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/mpparse.c Sun Aug 20 11:08:45 2006 -0400 @@ -107,7 +107,7 @@ static int __init mpf_checksum(unsigned * doing this .... */ -static int mpc_record; +static int mpc_record; static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; #ifdef CONFIG_X86_NUMAQ diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/nmi.c --- a/xen/arch/x86/nmi.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/nmi.c Sun Aug 20 11:08:45 2006 -0400 @@ -6,11 +6,11 @@ * Started by Ingo Molnar <mingo@xxxxxxxxxx> * * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. + * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. + * Mikael Pettersson : Power Management for local APIC NMI watchdog. + * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. + * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ #include <xen/config.h> diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/oprofile/nmi_int.c --- a/xen/arch/x86/oprofile/nmi_int.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/oprofile/nmi_int.c Sun Aug 20 11:08:45 2006 -0400 @@ -59,7 +59,7 @@ static void nmi_cpu_save_registers(struc static void nmi_cpu_save_registers(struct op_msrs *msrs) { unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; + unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; @@ -180,7 +180,7 @@ static void nmi_restore_registers(struct static void nmi_restore_registers(struct op_msrs * msrs) { unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; + unsigned int const nr_ctrls = model->num_controls; struct op_msr * counters = msrs->counters; struct op_msr * controls = msrs->controls; unsigned int i; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/oprofile/op_model_p4.c --- a/xen/arch/x86/oprofile/op_model_p4.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/oprofile/op_model_p4.c Sun Aug 20 11:08:45 2006 -0400 @@ -401,7 +401,7 @@ static unsigned long reset_value[NUM_COU static void p4_fill_in_addresses(struct op_msrs * const msrs) { - unsigned int i; + unsigned int i; unsigned int addr, stag; setup_num_counters(); diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/oprofile/xenoprof.c --- a/xen/arch/x86/oprofile/xenoprof.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/oprofile/xenoprof.c Sun Aug 20 11:08:45 2006 -0400 @@ -255,7 +255,7 @@ int reset_active(struct domain *d) active_ready[ind] = 0; active_domains[ind] = NULL; activated--; - put_domain(d); + put_domain(d); if ( activated <= 0 ) adomains = 0; @@ -316,7 +316,7 @@ int add_active_list (domid_t domid) if ( adomains >= MAX_OPROF_DOMAINS ) return -E2BIG; - d = find_domain_by_id(domid); + d = find_domain_by_id(domid); if ( d == NULL ) return -EINVAL; @@ -339,7 +339,7 @@ int add_passive_list(XEN_GUEST_HANDLE(vo if ( copy_from_guest(&passive, arg, 1) ) return -EFAULT; - d = find_domain_by_id(passive.domain_id); + d = find_domain_by_id(passive.domain_id); if ( d == NULL ) return -EINVAL; diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/setup.c Sun Aug 20 11:08:45 2006 -0400 @@ -44,7 +44,7 @@ boolean_param("nosmp", opt_nosmp); /* maxcpus: maximum number of CPUs to activate. */ static unsigned int max_cpus = NR_CPUS; -integer_param("maxcpus", max_cpus); +integer_param("maxcpus", max_cpus); /* opt_watchdog: If true, run a watchdog NMI on each processor. */ static int opt_watchdog = 0; @@ -532,8 +532,6 @@ void __init __start_xen(multiboot_info_t if ( opt_watchdog ) watchdog_enable(); - shadow_mode_init(); - /* initialize access control security module */ acm_init(&initrdidx, mbi, initial_images_start); diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Sun Aug 20 11:07:52 2006 -0400 +++ b/xen/arch/x86/smpboot.c Sun Aug 20 11:08:45 2006 -0400 @@ -896,7 +896,7 @@ static int __devinit do_boot_cpu(int api v = alloc_idle_vcpu(cpu); BUG_ON(v == NULL); - v->arch.monitor_table = pagetable_from_paddr(__pa(idle_pg_table)); + v->arch.cr3 = __pa(idle_pg_table); /* start_eip had better be page-aligned! */ _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |