[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[XenPPC] [xenppc-unstable] [merge] with http://xenbits.xensource.com/xen-unstable.hg



# HG changeset patch
# User Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
# Node ID 4cffec02b4788bc74d8a0ed7560fadbf332892b1
# Parent  96d6f9cfed6e0736e44454c739eb02ee2d77a2e8
[merge] with http://xenbits.xensource.com/xen-unstable.hg
---
 xen/arch/x86/audit.c                                            |  984 --
 xen/arch/x86/shadow.c                                           | 4199 
---------
 xen/arch/x86/shadow32.c                                         | 3782 --------
 xen/arch/x86/shadow_guest32.c                                   |   16 
 xen/arch/x86/shadow_guest32pae.c                                |   16 
 xen/arch/x86/shadow_public.c                                    | 2138 ----
 xen/include/asm-x86/shadow_64.h                                 |  587 -
 xen/include/asm-x86/shadow_ops.h                                |  138 
 xen/include/asm-x86/shadow_public.h                             |   61 
 xen/include/xen/font.h                                          |   22 
 .hgignore                                                       |    8 
 extras/mini-os/console/xencons_ring.c                           |    8 
 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c               |    8 
 linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c                    |    6 
 linux-2.6-xen-sparse/arch/ia64/dig/setup.c                      |  110 
 linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c           |   12 
 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c             |   28 
 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c                 |    5 
 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c              |   35 
 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c              |    4 
 linux-2.6-xen-sparse/drivers/xen/blkback/common.h               |    8 
 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c               |   12 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c            |    3 
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c                |   12 
 linux-2.6-xen-sparse/drivers/xen/blktap/common.h                |    4 
 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c                |    5 
 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c                  |   54 
 linux-2.6-xen-sparse/drivers/xen/netback/common.h               |    9 
 linux-2.6-xen-sparse/drivers/xen/netback/netback.c              |  274 
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c               |   39 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c            |  685 -
 linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c               |    1 
 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c              |    1 
 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c              |   10 
 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c               |    6 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c         |    2 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c            |    4 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c          |   62 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c             |   10 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h |   14 
 linux-2.6-xen-sparse/include/xen/balloon.h                      |    2 
 linux-2.6-xen-sparse/include/xen/hvm.h                          |    9 
 linux-2.6-xen-sparse/include/xen/xenbus.h                       |    3 
 tools/blktap/lib/Makefile                                       |    2 
 tools/examples/xmexample.hvm                                    |    4 
 tools/firmware/rombios/apmbios.S                                |    3 
 tools/firmware/rombios/rombios.c                                |    2 
 tools/ioemu/Makefile.target                                     |    1 
 tools/ioemu/hw/pc.c                                             |    3 
 tools/ioemu/hw/piix_pci.c                                       |    2 
 tools/ioemu/hw/xen_platform.c                                   |  138 
 tools/ioemu/patches/acpi-poweroff-support                       |    2 
 tools/ioemu/patches/acpi-support                                |   15 
 tools/ioemu/patches/acpi-timer-support                          |    4 
 tools/ioemu/patches/domain-destroy                              |    8 
 tools/ioemu/patches/domain-reset                                |   10 
 tools/ioemu/patches/domain-timeoffset                           |   12 
 tools/ioemu/patches/hypervisor-pit                              |    6 
 tools/ioemu/patches/ide-hd-multithread                          |    2 
 tools/ioemu/patches/ioemu-ia64                                  |   31 
 tools/ioemu/patches/qemu-allow-disable-sdl                      |    2 
 tools/ioemu/patches/qemu-daemonize                              |   16 
 tools/ioemu/patches/qemu-fix-memset-args                        |    2 
 tools/ioemu/patches/qemu-fix-write-to-disk-synchronous          |   12 
 tools/ioemu/patches/serial-non-block                            |    2 
 tools/ioemu/patches/series                                      |    2 
 tools/ioemu/patches/shadow-vram                                 |    4 
 tools/ioemu/patches/shared-vram                                 |   14 
 tools/ioemu/patches/support-xm-console                          |    2 
 tools/ioemu/patches/vnc-access-monitor-vt                       |    2 
 tools/ioemu/patches/vnc-cleanup                                 |    4 
 tools/ioemu/patches/vnc-display-find-unused                     |   10 
 tools/ioemu/patches/vnc-fixes                                   |   10 
 tools/ioemu/patches/vnc-start-vncviewer                         |   10 
 tools/ioemu/patches/vnc-title-domain-name                       |    2 
 tools/ioemu/patches/xen-mm                                      |   36 
 tools/ioemu/patches/xen-platform-device                         |   37 
 tools/ioemu/patches/xen-support-buffered-ioreqs                 |   26 
 tools/ioemu/patches/xenstore-block-device-config                |   23 
 tools/ioemu/patches/xenstore-write-vnc-port                     |   10 
 tools/ioemu/vl.c                                                |   76 
 tools/ioemu/vl.h                                                |    4 
 tools/libaio/src/Makefile                                       |    7 
 tools/libxc/xc_domain.c                                         |   13 
 tools/libxc/xc_hvm_build.c                                      |  176 
 tools/libxc/xc_linux_build.c                                    |    2 
 tools/libxc/xc_linux_save.c                                     |   18 
 tools/libxc/xenctrl.h                                           |    2 
 tools/misc/xc_shadow.c                                          |    2 
 tools/python/xen/lowlevel/xc/xc.c                               |   69 
 tools/python/xen/xend/XendDomain.py                             |   24 
 tools/python/xen/xend/XendDomainInfo.py                         |   75 
 tools/python/xen/xend/XendLogging.py                            |    2 
 tools/python/xen/xend/image.py                                  |   32 
 tools/python/xen/xend/server/DevController.py                   |   22 
 tools/python/xen/xend/server/XMLRPCServer.py                    |    4 
 tools/python/xen/xend/server/blkif.py                           |   19 
 tools/python/xen/xm/create.py                                   |    9 
 tools/python/xen/xm/main.py                                     |   23 
 tools/xenmon/Makefile                                           |   10 
 tools/xenstore/Makefile                                         |    2 
 tools/xentrace/Makefile                                         |    4 
 unmodified_drivers/linux-2.6/Makefile                           |    6 
 unmodified_drivers/linux-2.6/README                             |    7 
 unmodified_drivers/linux-2.6/blkfront/Kbuild                    |    5 
 unmodified_drivers/linux-2.6/mkbuildtree                        |   49 
 unmodified_drivers/linux-2.6/netfront/Kbuild                    |    4 
 unmodified_drivers/linux-2.6/overrides.mk                       |   12 
 unmodified_drivers/linux-2.6/platform-pci/Kbuild                |    7 
 unmodified_drivers/linux-2.6/platform-pci/evtchn.c              |  173 
 unmodified_drivers/linux-2.6/platform-pci/platform-pci.c        |  271 
 unmodified_drivers/linux-2.6/platform-pci/platform-pci.h        |   45 
 unmodified_drivers/linux-2.6/platform-pci/xen_support.c         |   39 
 unmodified_drivers/linux-2.6/xenbus/Kbuild                      |   10 
 xen/Rules.mk                                                    |    6 
 xen/acm/acm_core.c                                              |    4 
 xen/acm/acm_simple_type_enforcement_hooks.c                     |   28 
 xen/arch/ia64/Rules.mk                                          |    1 
 xen/arch/ia64/xen/domain.c                                      |   17 
 xen/arch/ia64/xen/mm.c                                          |    5 
 xen/arch/x86/Makefile                                           |   16 
 xen/arch/x86/Rules.mk                                           |    1 
 xen/arch/x86/acpi/boot.c                                        |    2 
 xen/arch/x86/apic.c                                             |   12 
 xen/arch/x86/boot/x86_32.S                                      |   28 
 xen/arch/x86/cpu/amd.c                                          |    2 
 xen/arch/x86/cpu/cyrix.c                                        |    2 
 xen/arch/x86/cpu/transmeta.c                                    |    2 
 xen/arch/x86/delay.c                                            |   14 
 xen/arch/x86/dmi_scan.c                                         |   10 
 xen/arch/x86/dom0_ops.c                                         |    4 
 xen/arch/x86/domain.c                                           |  109 
 xen/arch/x86/domain_build.c                                     |   19 
 xen/arch/x86/extable.c                                          |    4 
 xen/arch/x86/genapic/bigsmp.c                                   |    2 
 xen/arch/x86/genapic/es7000.h                                   |    4 
 xen/arch/x86/genapic/probe.c                                    |    2 
 xen/arch/x86/hvm/hvm.c                                          |   60 
 xen/arch/x86/hvm/i8259.c                                        |    6 
 xen/arch/x86/hvm/intercept.c                                    |    2 
 xen/arch/x86/hvm/platform.c                                     |    9 
 xen/arch/x86/hvm/svm/instrlen.c                                 |    2 
 xen/arch/x86/hvm/svm/svm.c                                      |  691 -
 xen/arch/x86/hvm/svm/vmcb.c                                     |   31 
 xen/arch/x86/hvm/svm/x86_32/exits.S                             |   12 
 xen/arch/x86/hvm/svm/x86_64/exits.S                             |   22 
 xen/arch/x86/hvm/vioapic.c                                      |    6 
 xen/arch/x86/hvm/vlapic.c                                       |    3 
 xen/arch/x86/hvm/vmx/vmcs.c                                     |   17 
 xen/arch/x86/hvm/vmx/vmx.c                                      |  258 
 xen/arch/x86/hvm/vmx/x86_32/exits.S                             |    2 
 xen/arch/x86/hvm/vmx/x86_64/exits.S                             |    2 
 xen/arch/x86/i387.c                                             |    2 
 xen/arch/x86/io_apic.c                                          |    2 
 xen/arch/x86/microcode.c                                        |    4 
 xen/arch/x86/mm.c                                               |  575 -
 xen/arch/x86/mpparse.c                                          |    2 
 xen/arch/x86/nmi.c                                              |    8 
 xen/arch/x86/oprofile/nmi_int.c                                 |    4 
 xen/arch/x86/oprofile/op_model_p4.c                             |    2 
 xen/arch/x86/oprofile/xenoprof.c                                |    6 
 xen/arch/x86/setup.c                                            |    4 
 xen/arch/x86/shadow2-common.c                                   | 3410 +++++++
 xen/arch/x86/shadow2.c                                          | 4492 
++++++++++
 xen/arch/x86/smpboot.c                                          |    2 
 xen/arch/x86/traps.c                                            |   44 
 xen/arch/x86/x86_32/domain_page.c                               |   33 
 xen/arch/x86/x86_32/entry.S                                     |  105 
 xen/arch/x86/x86_32/mm.c                                        |    3 
 xen/arch/x86/x86_64/entry.S                                     |   50 
 xen/arch/x86/x86_64/mm.c                                        |    5 
 xen/arch/x86/x86_64/traps.c                                     |   18 
 xen/common/acm_ops.c                                            |    1 
 xen/common/dom0_ops.c                                           |    6 
 xen/common/domain.c                                             |    2 
 xen/common/elf.c                                                |    1 
 xen/common/grant_table.c                                        |    4 
 xen/common/keyhandler.c                                         |   48 
 xen/common/memory.c                                             |    3 
 xen/common/rangeset.c                                           |    1 
 xen/common/sched_bvt.c                                          |    3 
 xen/common/sched_credit.c                                       |    2 
 xen/common/sched_sedf.c                                         |    9 
 xen/common/schedule.c                                           |    3 
 xen/common/timer.c                                              |    4 
 xen/common/trace.c                                              |    1 
 xen/drivers/Makefile                                            |    2 
 xen/drivers/char/console.c                                      |  209 
 xen/drivers/char/serial.c                                       |    1 
 xen/drivers/video/font.h                                        |   22 
 xen/drivers/video/font_8x14.c                                   |    2 
 xen/drivers/video/font_8x16.c                                   |    2 
 xen/drivers/video/font_8x8.c                                    |    2 
 xen/drivers/video/vga.c                                         |  220 
 xen/include/acm/acm_core.h                                      |    2 
 xen/include/asm-ia64/config.h                                   |    2 
 xen/include/asm-powerpc/shadow.h                                |    1 
 xen/include/asm-x86/acpi.h                                      |    2 
 xen/include/asm-x86/bitops.h                                    |   18 
 xen/include/asm-x86/config.h                                    |   24 
 xen/include/asm-x86/domain.h                                    |   99 
 xen/include/asm-x86/genapic.h                                   |    8 
 xen/include/asm-x86/grant_table.h                               |    4 
 xen/include/asm-x86/hvm/hvm.h                                   |   35 
 xen/include/asm-x86/hvm/support.h                               |   15 
 xen/include/asm-x86/hvm/svm/vmcb.h                              |   11 
 xen/include/asm-x86/hvm/vcpu.h                                  |   13 
 xen/include/asm-x86/hvm/vmx/vmcs.h                              |    4 
 xen/include/asm-x86/hvm/vmx/vmx.h                               |   49 
 xen/include/asm-x86/io.h                                        |    1 
 xen/include/asm-x86/mach-es7000/mach_mpparse.h                  |    2 
 xen/include/asm-x86/mach-generic/mach_mpparse.h                 |    4 
 xen/include/asm-x86/mm.h                                        |  145 
 xen/include/asm-x86/msr.h                                       |    6 
 xen/include/asm-x86/page-guest32.h                              |    7 
 xen/include/asm-x86/page.h                                      |   37 
 xen/include/asm-x86/perfc_defn.h                                |   53 
 xen/include/asm-x86/processor.h                                 |   17 
 xen/include/asm-x86/shadow.h                                    | 1791 ---
 xen/include/asm-x86/shadow2-multi.h                             |  116 
 xen/include/asm-x86/shadow2-private.h                           |  593 +
 xen/include/asm-x86/shadow2-types.h                             |  705 +
 xen/include/asm-x86/shadow2.h                                   |  627 +
 xen/include/asm-x86/string.h                                    |    2 
 xen/include/asm-x86/uaccess.h                                   |    6 
 xen/include/asm-x86/x86_32/page-2level.h                        |    1 
 xen/include/asm-x86/x86_32/page-3level.h                        |    3 
 xen/include/asm-x86/x86_64/page.h                               |    5 
 xen/include/public/arch-x86_32.h                                |   22 
 xen/include/public/arch-x86_64.h                                |   22 
 xen/include/public/dom0_ops.h                                   |   16 
 xen/include/public/hvm/e820.h                                   |    5 
 xen/include/public/xen.h                                        |   14 
 xen/include/xen/domain_page.h                                   |   13 
 xen/include/xen/gdbstub.h                                       |   10 
 xen/include/xen/keyhandler.h                                    |    6 
 xen/include/xen/lib.h                                           |    6 
 xen/include/xen/list.h                                          |   12 
 xen/include/xen/mm.h                                            |    3 
 xen/include/xen/sched.h                                         |    5 
 xen/include/xen/vga.h                                           |   14 
 241 files changed, 14239 insertions(+), 16143 deletions(-)

diff -r 96d6f9cfed6e -r 4cffec02b478 .hgignore
--- a/.hgignore Sun Aug 20 11:07:52 2006 -0400
+++ b/.hgignore Sun Aug 20 11:08:45 2006 -0400
@@ -151,7 +151,7 @@
 ^tools/vtpm_manager/manager/vtpm_managerd$
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
-^tools/xenmon/setmask$
+^tools/xenmon/xentrace_setmask$
 ^tools/xenmon/xenbaked$
 ^tools/xenstat/xentop/xentop$
 ^tools/xenstore/testsuite/tmp/.*$
@@ -172,7 +172,7 @@
 ^tools/xenstore/xs_tdb_dump$
 ^tools/xenstore/xs_test$
 ^tools/xenstore/xs_watch_stress$
-^tools/xentrace/setsize$
+^tools/xentrace/xentrace_setsize$
 ^tools/xentrace/tbctl$
 ^tools/xentrace/xenctx$
 ^tools/xentrace/xentrace$
@@ -204,3 +204,7 @@
 ^xen/arch/powerpc/firmware$
 ^xen/arch/powerpc/firmware_image$
 ^xen/arch/powerpc/xen\.lds$
+^unmodified_drivers/linux-2.6/\.tmp_versions
+^unmodified_drivers/linux-2.6/.*\.cmd$
+^unmodified_drivers/linux-2.6/.*\.ko$
+^unmodified_drivers/linux-2.6/.*\.mod\.c$
diff -r 96d6f9cfed6e -r 4cffec02b478 extras/mini-os/console/xencons_ring.c
--- a/extras/mini-os/console/xencons_ring.c     Sun Aug 20 11:07:52 2006 -0400
+++ b/extras/mini-os/console/xencons_ring.c     Sun Aug 20 11:08:45 2006 -0400
@@ -14,13 +14,13 @@
 
 static inline struct xencons_interface *xencons_interface(void)
 {
-    return mfn_to_virt(start_info.console_mfn);
+    return mfn_to_virt(start_info.console.domU.mfn);
 }
 
 static inline void notify_daemon(void)
 {
     /* Use evtchn: this is called early, before irq is set up. */
-    notify_remote_via_evtchn(start_info.console_evtchn);
+    notify_remote_via_evtchn(start_info.console.domU.evtchn);
 }
 
 int xencons_ring_send_no_notify(const char *data, unsigned len)
@@ -80,10 +80,10 @@ int xencons_ring_init(void)
 {
        int err;
 
-       if (!start_info.console_evtchn)
+       if (!start_info.console.domU.evtchn)
                return 0;
 
-       err = bind_evtchn(start_info.console_evtchn, handle_input,
+       err = bind_evtchn(start_info.console.domU.evtchn, handle_input,
                          NULL);
        if (err <= 0) {
                printk("XEN console request chn bind failed %i\n", err);
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Sun Aug 20 11:08:45 
2006 -0400
@@ -184,7 +184,6 @@ static struct resource code_resource = {
        .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 static struct resource system_rom_resource = {
        .name   = "System ROM",
        .start  = 0xf0000,
@@ -240,7 +239,6 @@ static struct resource video_rom_resourc
        .end    = 0xc7fff,
        .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
 };
-#endif
 
 static struct resource video_ram_resource = {
        .name   = "Video RAM area",
@@ -299,7 +297,6 @@ static struct resource standard_io_resou
 #define STANDARD_IO_RESOURCES \
        (sizeof standard_io_resources / sizeof standard_io_resources[0])
 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
 
 static int __init romchecksum(unsigned char *rom, unsigned long length)
@@ -317,9 +314,11 @@ static void __init probe_roms(void)
        unsigned char *rom;
        int           i;
 
+#ifdef CONFIG_XEN
        /* Nothing to do if not running in dom0. */
        if (!is_initial_xendomain())
                return;
+#endif
 
        /* video rom */
        upper = adapter_rom_resources[0].start;
@@ -379,7 +378,6 @@ static void __init probe_roms(void)
                start = adapter_rom_resources[i++].end & ~2047UL;
        }
 }
-#endif
 
 /*
  * Point at the empty zero page to start with. We map the real shared_info
@@ -1359,9 +1357,7 @@ legacy_init_iomem_resources(struct e820e
 {
        int i;
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
        probe_roms();
-#endif
 
        for (i = 0; i < nr_map; i++) {
                struct resource *res;
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c      Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c      Sun Aug 20 11:08:45 
2006 -0400
@@ -95,7 +95,10 @@ static struct irq_routing_table * __init
        u8 *addr;
        struct irq_routing_table *rt;
 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#ifdef CONFIG_XEN
+       if (!is_initial_xendomain())
+               return NULL;
+#endif
        if (pirq_table_addr) {
                rt = pirq_check_routing_table((u8 *) 
isa_bus_to_virt(pirq_table_addr));
                if (rt)
@@ -107,7 +110,6 @@ static struct irq_routing_table * __init
                if (rt)
                        return rt;
        }
-#endif
        
        return NULL;
 }
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c     Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/genapic_xen.c     Sun Aug 20 
11:08:45 2006 -0400
@@ -17,14 +17,8 @@
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 #include <asm/smp.h>
 #include <asm/ipi.h>
-#else
-#include <asm/apic.h>
-#include <asm/apicdef.h>
-#include <asm/genapic.h>
-#endif
 #include <xen/evtchn.h>
 
 DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
@@ -118,14 +112,12 @@ static void xen_send_IPI_mask(cpumask_t 
        local_irq_restore(flags);
 }
 
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 static int xen_apic_id_registered(void)
 {
        /* better be set */
        Dprintk("%s\n", __FUNCTION__);
        return physid_isset(smp_processor_id(), phys_cpu_present_map);
 }
-#endif
 
 static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
 {
@@ -144,15 +136,11 @@ static unsigned int phys_pkg_id(int inde
 
 struct genapic apic_xen =  {
        .name = "xen",
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
        .int_delivery_mode = dest_LowestPrio,
-#endif
        .int_dest_mode = (APIC_DEST_LOGICAL != 0),
        .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
        .target_cpus = xen_target_cpus,
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
        .apic_id_registered = xen_apic_id_registered,
-#endif
        .init_apic_ldr = xen_init_apic_ldr,
        .send_IPI_all = xen_send_IPI_all,
        .send_IPI_allbutself = xen_send_IPI_allbutself,
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c       Sun Aug 20 
11:08:45 2006 -0400
@@ -189,7 +189,6 @@ struct resource code_resource = {
 
 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
 static struct resource system_rom_resource = {
        .name = "System ROM",
        .start = 0xf0000,
@@ -218,19 +217,16 @@ static struct resource adapter_rom_resou
        { .name = "Adapter ROM", .start = 0, .end = 0,
                .flags = IORESOURCE_ROM }
 };
-#endif
 
 #define ADAPTER_ROM_RESOURCES \
        (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
 static struct resource video_rom_resource = {
        .name = "Video ROM",
        .start = 0xc0000,
        .end = 0xc7fff,
        .flags = IORESOURCE_ROM,
 };
-#endif
 
 static struct resource video_ram_resource = {
        .name = "Video RAM area",
@@ -239,7 +235,6 @@ static struct resource video_ram_resourc
        .flags = IORESOURCE_RAM,
 };
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
 
 static int __init romchecksum(unsigned char *rom, unsigned long length)
@@ -256,6 +251,12 @@ static void __init probe_roms(void)
        unsigned long start, length, upper;
        unsigned char *rom;
        int           i;
+
+#ifdef CONFIG_XEN
+       /* Nothing to do if not running in dom0. */
+       if (!is_initial_xendomain())
+               return;
+#endif
 
        /* video rom */
        upper = adapter_rom_resources[0].start;
@@ -315,7 +316,6 @@ static void __init probe_roms(void)
                start = adapter_rom_resources[i++].end & ~2047UL;
        }
 }
-#endif
 
 static __init void parse_cmdline_early (char ** cmdline_p)
 {
@@ -625,11 +625,8 @@ void __init setup_arch(char **cmdline_p)
 void __init setup_arch(char **cmdline_p)
 {
        unsigned long kernel_end;
-
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
        struct e820entry *machine_e820;
        struct xen_memory_map memmap;
-#endif
 
 #ifdef CONFIG_XEN
        /* Register a call for panic conditions. */
@@ -936,8 +933,8 @@ void __init setup_arch(char **cmdline_p)
         * Request address space for all standard RAM and ROM resources
         * and also for regions reported as reserved by the e820.
         */
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
        probe_roms();
+#ifdef CONFIG_XEN
        if (is_initial_xendomain()) {
                machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
 
@@ -948,13 +945,8 @@ void __init setup_arch(char **cmdline_p)
 
                e820_reserve_resources(machine_e820, memmap.nr_entries);
        } else
-               e820_reserve_resources(e820.map, e820.nr_map);
-#elif defined(CONFIG_XEN)
+#endif
        e820_reserve_resources(e820.map, e820.nr_map);
-#else
-       probe_roms();
-       e820_reserve_resources(e820.map, e820.nr_map);
-#endif
 
        request_resource(&iomem_resource, &video_ram_resource);
 
@@ -965,12 +957,12 @@ void __init setup_arch(char **cmdline_p)
                request_resource(&ioport_resource, &standard_io_resources[i]);
        }
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+#ifdef CONFIG_XEN
        if (is_initial_xendomain()) {
                e820_setup_gap(machine_e820, memmap.nr_entries);
                free_bootmem(__pa(machine_e820), PAGE_SIZE);
        }
-#elif !defined(CONFIG_XEN)
+#else
        e820_setup_gap(e820.map, e820.nr_map);
 #endif
 
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Sun Aug 20 11:08:45 
2006 -0400
@@ -343,7 +343,6 @@ static void backend_changed(struct xenbu
        case XenbusStateInitialising:
        case XenbusStateInitWait:
        case XenbusStateInitialised:
-       case XenbusStateUnknown:
                break;
 
        case XenbusStateConnected:
@@ -354,10 +353,10 @@ static void backend_changed(struct xenbu
                tpmif_set_connected_state(tp, 0);
                break;
 
+       case XenbusStateUnknown:
        case XenbusStateClosed:
-               if (tp->is_suspended == 0) {
+               if (tp->is_suspended == 0)
                        device_unregister(&dev->dev);
-               }
                xenbus_switch_state(dev, XenbusStateClosed);
                break;
        }
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Sun Aug 20 
11:08:45 2006 -0400
@@ -76,7 +76,7 @@ static unsigned long target_pages;
 static unsigned long target_pages;
 
 /* We increase/decrease in batches which fit in a page */
-static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
 /* VM /proc information for memory */
 extern unsigned long totalram_pages;
@@ -440,20 +440,16 @@ static int balloon_read(char *page, char
                "Requested target:   %8lu kB\n"
                "Low-mem balloon:    %8lu kB\n"
                "High-mem balloon:   %8lu kB\n"
+               "Driver pages:       %8lu kB\n"
                "Xen hard limit:     ",
                PAGES2KB(current_pages), PAGES2KB(target_pages), 
-               PAGES2KB(balloon_low), PAGES2KB(balloon_high));
-
-       if (hard_limit != ~0UL) {
-               len += sprintf(
-                       page + len, 
-                       "%8lu kB (inc. %8lu kB driver headroom)\n",
-                       PAGES2KB(hard_limit), PAGES2KB(driver_pages));
-       } else {
-               len += sprintf(
-                       page + len,
-                       "     ??? kB\n");
-       }
+               PAGES2KB(balloon_low), PAGES2KB(balloon_high),
+               PAGES2KB(driver_pages));
+
+       if (hard_limit != ~0UL)
+               len += sprintf(page + len, "%8lu kB\n", PAGES2KB(hard_limit));
+       else
+               len += sprintf(page + len, "     ??? kB\n");
 
        *eof = 1;
        return len;
@@ -610,8 +606,21 @@ void balloon_dealloc_empty_page_range(
        schedule_work(&balloon_worker);
 }
 
+void balloon_release_driver_page(struct page *page)
+{
+       unsigned long flags;
+
+       balloon_lock(flags);
+       balloon_append(page);
+       driver_pages--;
+       balloon_unlock(flags);
+
+       schedule_work(&balloon_worker);
+}
+
 EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
 EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
 EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+EXPORT_SYMBOL_GPL(balloon_release_driver_page);
 
 MODULE_LICENSE("Dual BSD/GPL");
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Sun Aug 20 
11:08:45 2006 -0400
@@ -341,7 +341,7 @@ static void dispatch_rw_block_io(blkif_t
                                 blkif_request_t *req,
                                 pending_req_t *pending_req)
 {
-       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
        int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct phys_req preq;
@@ -409,7 +409,7 @@ static void dispatch_rw_block_io(blkif_t
                DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
                        operation == READ ? "read" : "write",
                        preq.sector_number,
-                       preq.sector_number + preq.nr_sects, preq.dev); 
+                       preq.sector_number + preq.nr_sects, preq.dev);
                goto fail_flush;
        }
 
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Sun Aug 20 11:08:45 
2006 -0400
@@ -55,9 +55,9 @@ struct vbd {
        unsigned char  type;        /* VDISK_xxx */
        u32            pdevice;     /* phys device that this vbd maps to */
        struct block_device *bdev;
-}; 
+};
 
-struct backend_info; 
+struct backend_info;
 
 typedef struct blkif_st {
        /* Unique identifier for this interface. */
@@ -72,7 +72,7 @@ typedef struct blkif_st {
        /* The VBD attached to this interface. */
        struct vbd        vbd;
        /* Back pointer to the backend_info. */
-       struct backend_info *be; 
+       struct backend_info *be;
        /* Private fields. */
        spinlock_t       blk_ring_lock;
        atomic_t         refcnt;
@@ -122,7 +122,7 @@ struct phys_req {
        blkif_sector_t       sector_number;
 };
 
-int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
 
 void blkif_interface_init(void);
 
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Sun Aug 20 11:08:45 
2006 -0400
@@ -194,7 +194,7 @@ static int blkback_probe(struct xenbus_d
        }
 
        /* setup back pointer */
-       be->blkif->be = be; 
+       be->blkif->be = be;
 
        err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
                                 &be->backend_watch, backend_changed);
@@ -287,7 +287,7 @@ static void backend_changed(struct xenbu
                }
 
                /* We're potentially connected now */
-               update_blkif_status(be->blkif); 
+               update_blkif_status(be->blkif);
        }
 }
 
@@ -305,6 +305,11 @@ static void frontend_changed(struct xenb
 
        switch (frontend_state) {
        case XenbusStateInitialising:
+               if (dev->state == XenbusStateClosing) {
+                       printk("%s: %s: prepare for reconnect\n",
+                              __FUNCTION__, dev->nodename);
+                       xenbus_switch_state(dev, XenbusStateInitWait);
+               }
                break;
 
        case XenbusStateInitialised:
@@ -326,12 +331,11 @@ static void frontend_changed(struct xenb
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                device_unregister(&dev->dev);
                break;
 
-       case XenbusStateUnknown:
-       case XenbusStateInitWait:
        default:
                xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
                                 frontend_state);
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Sun Aug 20 
11:08:45 2006 -0400
@@ -46,6 +46,7 @@
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 #include <asm/hypervisor.h>
+#include <asm/maddr.h>
 
 #define BLKIF_STATE_DISCONNECTED 0
 #define BLKIF_STATE_CONNECTED    1
@@ -255,10 +256,10 @@ static void backend_changed(struct xenbu
        DPRINTK("blkfront:backend_changed.\n");
 
        switch (backend_state) {
-       case XenbusStateUnknown:
        case XenbusStateInitialising:
        case XenbusStateInitWait:
        case XenbusStateInitialised:
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                break;
 
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Sun Aug 20 11:08:45 
2006 -0400
@@ -157,7 +157,7 @@ typedef unsigned int PEND_RING_IDX;
 typedef unsigned int PEND_RING_IDX;
 
 static inline int MASK_PEND_IDX(int i) { 
-       return (i & (MAX_PENDING_REQS-1)); 
+       return (i & (MAX_PENDING_REQS-1));
 }
 
 static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
@@ -754,7 +754,7 @@ static int req_increase(void)
        if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
                kfree(pending_reqs[mmap_alloc]);
                kfree(pending_addrs[mmap_alloc]);
-               WPRINTK("%s: out of memory\n", __FUNCTION__); 
+               WPRINTK("%s: out of memory\n", __FUNCTION__);
                ret = -ENOMEM;
                goto done;
        }
@@ -1051,7 +1051,7 @@ static int blktap_read_ufe_ring(int idx)
                        unsigned long kvaddr, uvaddr;
                        struct page **map = info->vma->vm_private_data;
                        struct page *pg;
-                       int offset; 
+                       int offset;
 
                        uvaddr  = MMAP_VADDR(info->user_vstart, usr_idx, j);
                        kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
@@ -1063,7 +1063,7 @@ static int blktap_read_ufe_ring(int idx)
                                >> PAGE_SHIFT;
                        map[offset] = NULL;
                }
-               fast_flush_area(pending_req, pending_idx, usr_idx, idx); 
+               fast_flush_area(pending_req, pending_idx, usr_idx, idx);
                make_response(blkif, pending_req->id, resp->operation,
                              resp->status);
                info->idx_map[usr_idx] = INVALID_REQ;
@@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif
                               "ring does not exist!\n");
                        print_dbug = 0; /*We only print this message once*/
                }
-               return 1; 
+               return 1;
        }
 
        info = tapfds[blkif->dev_num];
@@ -1185,7 +1185,7 @@ static void dispatch_rw_block_io(blkif_t
                                 blkif_request_t *req,
                                 pending_req_t *pending_req)
 {
-       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
        int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
        unsigned int nseg;
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/blktap/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Sun Aug 20 11:08:45 
2006 -0400
@@ -49,7 +49,7 @@
 
 #define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
 
-struct backend_info; 
+struct backend_info;
 
 typedef struct blkif_st {
        /* Unique identifier for this interface. */
@@ -62,7 +62,7 @@ typedef struct blkif_st {
        blkif_back_ring_t blk_ring;
        struct vm_struct *blk_ring_area;
        /* Back pointer to the backend_info. */
-       struct backend_info *be; 
+       struct backend_info *be;
        /* Private fields. */
        spinlock_t       blk_ring_lock;
        atomic_t         refcnt;
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Sun Aug 20 11:08:45 
2006 -0400
@@ -174,7 +174,7 @@ static int blktap_probe(struct xenbus_de
        }
 
        /* setup back pointer */
-       be->blkif->be = be; 
+       be->blkif->be = be;
        be->blkif->sectors = 0;
 
        /* set a watch on disk info, waiting for userspace to update details*/
@@ -267,12 +267,11 @@ static void tap_frontend_changed(struct 
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                device_unregister(&dev->dev);
                break;
 
-       case XenbusStateUnknown:
-       case XenbusStateInitWait:
        default:
                xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
                                 frontend_state);
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Sun Aug 20 11:08:45 
2006 -0400
@@ -41,6 +41,8 @@
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/synch_bitops.h>
+#include <asm/io.h>
+#include <xen/interface/memory.h>
 
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
@@ -350,6 +352,8 @@ void gnttab_cancel_free_callback(struct 
 }
 EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
 
+#ifdef CONFIG_XEN
+
 #ifndef __ia64__
 static int map_pte_fn(pte_t *pte, struct page *pmd_page,
                      unsigned long addr, void *data)
@@ -410,17 +414,53 @@ int gnttab_resume(void)
 
 int gnttab_suspend(void)
 {
-
 #ifndef __ia64__
        apply_to_page_range(&init_mm, (unsigned long)shared,
                            PAGE_SIZE * NR_GRANT_FRAMES,
                            unmap_pte_fn, NULL);
 #endif
-
-       return 0;
-}
-
-static int __init gnttab_init(void)
+       return 0;
+}
+
+#else /* !CONFIG_XEN */
+
+#include <platform-pci.h>
+
+int gnttab_resume(void)
+{
+       unsigned long frames;
+       struct xen_add_to_physmap xatp;
+       unsigned int i;
+
+       frames = alloc_xen_mmio(PAGE_SIZE * NR_GRANT_FRAMES);
+
+       for (i = 0; i < NR_GRANT_FRAMES; i++) {
+               xatp.domid = DOMID_SELF;
+               xatp.idx = i;
+               xatp.space = XENMAPSPACE_grant_table;
+               xatp.gpfn = (frames >> PAGE_SHIFT) + i;
+               if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+                       BUG();
+       }
+
+       shared = ioremap(frames, PAGE_SIZE * NR_GRANT_FRAMES);
+       if (shared == NULL) {
+               printk("error to ioremap gnttab share frames\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+int gnttab_suspend(void)
+{
+       iounmap(shared);
+       return 0;
+}
+
+#endif /* !CONFIG_XEN */
+
+int __init gnttab_init(void)
 {
        int i;
 
@@ -439,4 +479,6 @@ static int __init gnttab_init(void)
        return 0;
 }
 
+#ifdef CONFIG_XEN
 core_initcall(gnttab_init);
+#endif
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Sun Aug 20 11:08:45 
2006 -0400
@@ -64,9 +64,9 @@ typedef struct netif_st {
 
        /* Physical parameters of the comms window. */
        grant_handle_t   tx_shmem_handle;
-       grant_ref_t      tx_shmem_ref; 
+       grant_ref_t      tx_shmem_ref;
        grant_handle_t   rx_shmem_handle;
-       grant_ref_t      rx_shmem_ref; 
+       grant_ref_t      rx_shmem_ref;
        unsigned int     evtchn;
        unsigned int     irq;
 
@@ -78,7 +78,10 @@ typedef struct netif_st {
 
        /* Set of features that can be turned on in dev->features. */
        int features;
-       int can_queue;
+
+       /* Internal feature information. */
+       int can_queue:1;        /* can queue packets for receiver? */
+       int copying_receiver:1; /* copy packets to receiver?       */
 
        /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
        RING_IDX rx_req_cons_peek;
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Sun Aug 20 
11:08:45 2006 -0400
@@ -43,6 +43,7 @@ struct netbk_rx_meta {
 struct netbk_rx_meta {
        skb_frag_t frag;
        int id;
+       int copy:1;
 };
 
 static void netif_idx_release(u16 pending_idx);
@@ -72,6 +73,8 @@ static unsigned long mmap_vstart;
 static unsigned long mmap_vstart;
 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
 
+static void *rx_mmap_area;
+
 #define PKT_PROT_LEN 64
 
 static struct {
@@ -277,12 +280,11 @@ int netif_be_start_xmit(struct sk_buff *
                goto drop;
        }
 
-       /*
-        * We do not copy the packet unless:
-        *  1. The data -- including any in fragments -- is shared; or
-        *  2. The data is not allocated from our special cache.
-        */
-       if (!is_flippable_skb(skb)) {
+       /* Copy the packet here if it's destined for a flipping
+          interface but isn't flippable (e.g. extra references to
+          data)
+       */
+       if (!netif->copying_receiver && !is_flippable_skb(skb)) {
                struct sk_buff *nskb = netbk_copy_skb(skb);
                if ( unlikely(nskb == NULL) )
                        goto drop;
@@ -328,7 +330,7 @@ static void xen_network_done_notify(void
 /* 
  * Add following to poll() function in NAPI driver (Tigon3 is example):
  *  if ( xen_network_done() )
- *      tg3_enable_ints(tp); 
+ *      tg3_enable_ints(tp);
  */
 int xen_network_done(void)
 {
@@ -340,49 +342,74 @@ struct netrx_pending_operations {
        unsigned trans_prod, trans_cons;
        unsigned mmu_prod, mmu_cons;
        unsigned mcl_prod, mcl_cons;
+       unsigned copy_prod, copy_cons;
        unsigned meta_prod, meta_cons;
        mmu_update_t *mmu;
        gnttab_transfer_t *trans;
+       gnttab_copy_t *copy;
        multicall_entry_t *mcl;
        struct netbk_rx_meta *meta;
 };
 
-static u16 netbk_gop_frag(netif_t *netif, struct page *page,
-                         int i, struct netrx_pending_operations *npo)
+/* Set up the grant operations for this fragment.  If it's a flipping
+   interface, we also set up the unmap request from here. */
+static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+                         int i, struct netrx_pending_operations *npo,
+                         struct page *page, unsigned long size,
+                         unsigned long offset)
 {
        mmu_update_t *mmu;
        gnttab_transfer_t *gop;
+       gnttab_copy_t *copy_gop;
        multicall_entry_t *mcl;
        netif_rx_request_t *req;
        unsigned long old_mfn, new_mfn;
 
        old_mfn = virt_to_mfn(page_address(page));
 
-       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-               new_mfn = alloc_mfn();
-
-               /*
-                * Set the new P2M table entry before reassigning
-                * the old data page. Heed the comment in
-                * pgtable-2level.h:pte_page(). :-)
-                */
-               set_phys_to_machine(page_to_pfn(page), new_mfn);
-
-               mcl = npo->mcl + npo->mcl_prod++;
-               MULTI_update_va_mapping(mcl, (unsigned long)page_address(page),
-                                       pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
-
-               mmu = npo->mmu + npo->mmu_prod++;
-               mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
-                       MMU_MACHPHYS_UPDATE;
-               mmu->val = page_to_pfn(page);
-       }
-
        req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
-       gop = npo->trans + npo->trans_prod++;
-       gop->mfn = old_mfn;
-       gop->domid = netif->domid;
-       gop->ref = req->gref;
+       if (netif->copying_receiver) {
+               /* The fragment needs to be copied rather than
+                  flipped. */
+               meta->copy = 1;
+               copy_gop = npo->copy + npo->copy_prod++;
+               copy_gop->source.domid = DOMID_SELF;
+               copy_gop->source.offset = offset;
+               copy_gop->source.u.gmfn = old_mfn;
+               copy_gop->dest.domid = netif->domid;
+               copy_gop->dest.offset = 0;
+               copy_gop->dest.u.ref = req->gref;
+               copy_gop->len = size;
+               copy_gop->flags = GNTCOPY_dest_gref;
+       } else {
+               meta->copy = 0;
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       new_mfn = alloc_mfn();
+
+                       /*
+                        * Set the new P2M table entry before
+                        * reassigning the old data page. Heed the
+                        * comment in pgtable-2level.h:pte_page(). :-)
+                        */
+                       set_phys_to_machine(page_to_pfn(page), new_mfn);
+
+                       mcl = npo->mcl + npo->mcl_prod++;
+                       MULTI_update_va_mapping(mcl,
+                                            (unsigned long)page_address(page),
+                                            pfn_pte_ma(new_mfn, PAGE_KERNEL),
+                                            0);
+
+                       mmu = npo->mmu + npo->mmu_prod++;
+                       mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+                               MMU_MACHPHYS_UPDATE;
+                       mmu->val = page_to_pfn(page);
+               }
+
+               gop = npo->trans + npo->trans_prod++;
+               gop->mfn = old_mfn;
+               gop->domid = netif->domid;
+               gop->ref = req->gref;
+       }
        return req->id;
 }
 
@@ -403,18 +430,21 @@ static void netbk_gop_skb(struct sk_buff
        for (i = 0; i < nr_frags; i++) {
                meta = npo->meta + npo->meta_prod++;
                meta->frag = skb_shinfo(skb)->frags[i];
-               meta->id = netbk_gop_frag(netif, meta->frag.page,
-                                         i + extra, npo);
+               meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
+                                         meta->frag.page,
+                                         meta->frag.size,
+                                         meta->frag.page_offset);
        }
 
        /*
         * This must occur at the end to ensure that we don't trash
         * skb_shinfo until we're done.
         */
-       head_meta->id = netbk_gop_frag(netif,
+       head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
                                       virt_to_page(skb->data),
-                                      0,
-                                      npo);
+                                      skb_headlen(skb),
+                                      offset_in_page(skb->data));
+
        netif->rx.req_cons += nr_frags + extra;
 }
 
@@ -430,32 +460,43 @@ static inline void netbk_free_pages(int 
    used to set up the operations on the top of
    netrx_pending_operations, which have since been done.  Check that
    they didn't give any errors and advance over them. */
-static int netbk_check_gop(int nr_frags, domid_t domid, int count,
+static int netbk_check_gop(int nr_frags, domid_t domid,
                           struct netrx_pending_operations *npo)
 {
        multicall_entry_t *mcl;
        gnttab_transfer_t *gop;
+       gnttab_copy_t     *copy_op;
        int status = NETIF_RSP_OKAY;
        int i;
 
        for (i = 0; i <= nr_frags; i++) {
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       mcl = npo->mcl + npo->mcl_cons++;
-                       /* The update_va_mapping() must not fail. */
-                       BUG_ON(mcl->result != 0);
-               }
-
-               gop = npo->trans + npo->trans_cons++;
-               /* Check the reassignment error code. */
-               if (gop->status != 0) {
-                       DPRINTK("Bad status %d from grant transfer to DOM%u\n",
-                               gop->status, domid);
-                       /*
-                        * Page no longer belongs to us unless GNTST_bad_page,
-                        * but that should be a fatal error anyway.
-                        */
-                       BUG_ON(gop->status == GNTST_bad_page);
-                       status = NETIF_RSP_ERROR;
+               if (npo->meta[npo->meta_cons + i].copy) {
+                       copy_op = npo->copy + npo->copy_cons++;
+                       if (copy_op->status != GNTST_okay) {
+                               DPRINTK("Bad status %d from copy to DOM%d.\n",
+                                       gop->status, domid);
+                               status = NETIF_RSP_ERROR;
+                       }
+               } else {
+                       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                               mcl = npo->mcl + npo->mcl_cons++;
+                               /* The update_va_mapping() must not fail. */
+                               BUG_ON(mcl->result != 0);
+                       }
+
+                       gop = npo->trans + npo->trans_cons++;
+                       /* Check the reassignment error code. */
+                       if (gop->status != 0) {
+                               DPRINTK("Bad status %d from grant transfer to 
DOM%u\n",
+                                       gop->status, domid);
+                               /*
+                                * Page no longer belongs to us unless
+                                * GNTST_bad_page, but that should be
+                                * a fatal error anyway.
+                                */
+                               BUG_ON(gop->status == GNTST_bad_page);
+                               status = NETIF_RSP_ERROR;
+                       }
                }
        }
 
@@ -466,23 +507,27 @@ static void netbk_add_frag_responses(net
                                     struct netbk_rx_meta *meta, int nr_frags)
 {
        int i;
+       unsigned long offset;
 
        for (i = 0; i < nr_frags; i++) {
                int id = meta[i].id;
                int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
 
-               make_rx_response(netif, id, status, meta[i].frag.page_offset,
+               if (meta[i].copy)
+                       offset = 0;
+               else
+                       offset = meta[i].frag.page_offset;
+               make_rx_response(netif, id, status, offset,
                                 meta[i].frag.size, flags);
        }
 }
 
 static void net_rx_action(unsigned long unused)
 {
-       netif_t *netif = NULL; 
+       netif_t *netif = NULL;
        s8 status;
        u16 id, irq, flags;
        netif_rx_response_t *resp;
-       struct netif_extra_info *extra;
        multicall_entry_t *mcl;
        struct sk_buff_head rxq;
        struct sk_buff *skb;
@@ -490,6 +535,7 @@ static void net_rx_action(unsigned long 
        int ret;
        int nr_frags;
        int count;
+       unsigned long offset;
 
        /*
         * Putting hundreds of bytes on the stack is considered rude.
@@ -497,14 +543,16 @@ static void net_rx_action(unsigned long 
         */
        static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
        static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-       static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
+       static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
+       static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
        static unsigned char rx_notify[NR_IRQS];
        static u16 notify_list[NET_RX_RING_SIZE];
        static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
 
        struct netrx_pending_operations npo = {
                mmu: rx_mmu,
-               trans: grant_rx_op,
+               trans: grant_trans_op,
+               copy: grant_copy_op,
                mcl: rx_mcl,
                meta: meta};
 
@@ -538,12 +586,8 @@ static void net_rx_action(unsigned long 
                        break;
        }
 
-       if (!count)
-               return;
-
-       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-               BUG_ON(npo.mcl_prod == 0);
-
+       if (npo.mcl_prod &&
+           !xen_feature(XENFEAT_auto_translated_physmap)) {
                mcl = npo.mcl + npo.mcl_prod++;
 
                BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
@@ -551,36 +595,63 @@ static void net_rx_action(unsigned long 
 
                mcl->op = __HYPERVISOR_mmu_update;
                mcl->args[0] = (unsigned long)rx_mmu;
-               mcl->args[1] = count;
+               mcl->args[1] = npo.mmu_prod;
                mcl->args[2] = 0;
                mcl->args[3] = DOMID_SELF;
        }
 
-       mcl = npo.mcl + npo.mcl_prod++;
-       mcl->op = __HYPERVISOR_grant_table_op;
-       mcl->args[0] = GNTTABOP_transfer;
-       mcl->args[1] = (unsigned long)grant_rx_op;
-       mcl->args[2] = npo.trans_prod;
+       if (npo.trans_prod) {
+               mcl = npo.mcl + npo.mcl_prod++;
+               mcl->op = __HYPERVISOR_grant_table_op;
+               mcl->args[0] = GNTTABOP_transfer;
+               mcl->args[1] = (unsigned long)grant_trans_op;
+               mcl->args[2] = npo.trans_prod;
+       }
+
+       if (npo.copy_prod) {
+               mcl = npo.mcl + npo.mcl_prod++;
+               mcl->op = __HYPERVISOR_grant_table_op;
+               mcl->args[0] = GNTTABOP_copy;
+               mcl->args[1] = (unsigned long)grant_copy_op;
+               mcl->args[2] = npo.copy_prod;
+       }
+
+       /* Nothing to do? */
+       if (!npo.mcl_prod)
+               return;
+
+       BUG_ON(npo.copy_prod > NET_RX_RING_SIZE);
+       BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE);
+       BUG_ON(npo.trans_prod > NET_RX_RING_SIZE);
+       BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3);
+       BUG_ON(npo.meta_prod > NET_RX_RING_SIZE);
 
        ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
        BUG_ON(ret != 0);
-       BUG_ON(mcl->result != 0);
-
-       count = 0;
+
        while ((skb = __skb_dequeue(&rxq)) != NULL) {
                nr_frags = *(int *)skb->cb;
 
-               atomic_set(&(skb_shinfo(skb)->dataref), 1);
-               skb_shinfo(skb)->nr_frags = 0;
-               skb_shinfo(skb)->frag_list = NULL;
-
                netif = netdev_priv(skb->dev);
+               /* We can't rely on skb_release_data to release the
+                  pages used by fragments for us, since it tries to
+                  touch the pages in the fraglist.  If we're in
+                  flipping mode, that doesn't work.  In copying mode,
+                  we still have access to all of the pages, and so
+                  it's safe to let release_data deal with it. */
+               /* (Freeing the fragments is safe since we copy
+                  non-linear skbs destined for flipping interfaces) */
+               if (!netif->copying_receiver) {
+                       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+                       skb_shinfo(skb)->frag_list = NULL;
+                       skb_shinfo(skb)->nr_frags = 0;
+                       netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
+               }
+
                netif->stats.tx_bytes += skb->len;
                netif->stats.tx_packets++;
 
-               netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
-               status = netbk_check_gop(nr_frags, netif->domid, count,
-                                        &npo);
+               status = netbk_check_gop(nr_frags, netif->domid, &npo);
 
                id = meta[npo.meta_cons].id;
                flags = nr_frags ? NETRXF_more_data : 0;
@@ -590,11 +661,12 @@ static void net_rx_action(unsigned long 
                else if (skb->proto_data_valid) /* remote but checksummed? */
                        flags |= NETRXF_data_validated;
 
-               resp = make_rx_response(netif, id, status,
-                                       offset_in_page(skb->data),
+               if (meta[npo.meta_cons].copy)
+                       offset = 0;
+               else
+                       offset = offset_in_page(skb->data);
+               resp = make_rx_response(netif, id, status, offset,
                                        skb_headlen(skb), flags);
-
-               extra = NULL;
 
                if (meta[npo.meta_cons].frag.size) {
                        struct netif_extra_info *gso =
@@ -602,10 +674,7 @@ static void net_rx_action(unsigned long 
                                RING_GET_RESPONSE(&netif->rx,
                                                  netif->rx.rsp_prod_pvt++);
 
-                       if (extra)
-                               extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
-                       else
-                               resp->flags |= NETRXF_extra_info;
+                       resp->flags |= NETRXF_extra_info;
 
                        gso->u.gso.size = meta[npo.meta_cons].frag.size;
                        gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -614,7 +683,6 @@ static void net_rx_action(unsigned long 
 
                        gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
                        gso->flags = 0;
-                       extra = gso;
                }
 
                netbk_add_frag_responses(netif, status,
@@ -634,7 +702,6 @@ static void net_rx_action(unsigned long 
 
                netif_put(netif);
                dev_kfree_skb(skb);
-
                npo.meta_cons += nr_frags + 1;
        }
 
@@ -1095,7 +1162,7 @@ static void net_tx_action(unsigned long 
                if (unlikely(txreq.size < ETH_HLEN)) {
                        DPRINTK("Bad packet size: %d\n", txreq.size);
                        netbk_tx_err(netif, &txreq, i);
-                       continue; 
+                       continue;
                }
 
                /* No crossing a page as the payload mustn't fragment. */
@@ -1151,6 +1218,9 @@ static void net_tx_action(unsigned long 
                        skb_shinfo(skb)->nr_frags++;
                        skb_shinfo(skb)->frags[0].page =
                                (void *)(unsigned long)pending_idx;
+               } else {
+                       /* Discriminate from any valid pending_idx value. */
+                       skb_shinfo(skb)->frags[0].page = (void *)~0UL;
                }
 
                __skb_queue_tail(&tx_queue, skb);
@@ -1251,6 +1321,12 @@ static void netif_page_release(struct pa
        set_page_count(page, 1);
 
        netif_idx_release(pending_idx);
+}
+
+static void netif_rx_page_release(struct page *page)
+{
+       /* Ready for next use. */
+       set_page_count(page, 1);
 }
 
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
@@ -1383,6 +1459,16 @@ static int __init netback_init(void)
                SetPageForeign(page, netif_page_release);
        }
 
+       page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
+       BUG_ON(page == NULL);
+       rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
+
+       for (i = 0; i < NET_RX_RING_SIZE; i++) {
+               page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
+               set_page_count(page, 1);
+               SetPageForeign(page, netif_rx_page_release);
+       }
+
        pending_cons = 0;
        pending_prod = MAX_PENDING_REQS;
        for (i = 0; i < MAX_PENDING_REQS; i++)
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Sun Aug 20 11:08:45 
2006 -0400
@@ -108,6 +108,12 @@ static int netback_probe(struct xenbus_d
                        goto abort_transaction;
                }
 
+               err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", 
"%d", 1);
+               if (err) {
+                       message = "writing feature-copying";
+                       goto abort_transaction;
+               }
+
                err = xenbus_transaction_end(xbt, 0);
        } while (err == -EAGAIN);
 
@@ -228,10 +234,25 @@ static void frontend_changed(struct xenb
 
        switch (frontend_state) {
        case XenbusStateInitialising:
+               if (dev->state == XenbusStateClosing) {
+                       printk("%s: %s: prepare for reconnect\n",
+                              __FUNCTION__, dev->nodename);
+                       if (be->netif) {
+                               netif_disconnect(be->netif);
+                               be->netif = NULL;
+                       }
+                       xenbus_switch_state(dev, XenbusStateInitWait);
+               }
+               break;
+
        case XenbusStateInitialised:
                break;
 
        case XenbusStateConnected:
+               if (!be->netif) {
+                       /* reconnect: setup be->netif */
+                       backend_changed(&be->backend_watch, NULL, 0);
+               }
                maybe_connect(be);
                break;
 
@@ -239,14 +260,13 @@ static void frontend_changed(struct xenb
                xenbus_switch_state(dev, XenbusStateClosing);
                break;
 
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                if (be->netif != NULL)
                        kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
                device_unregister(&dev->dev);
                break;
 
-       case XenbusStateUnknown:
-       case XenbusStateInitWait:
        default:
                xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
                                 frontend_state);
@@ -349,7 +369,7 @@ static int connect_rings(struct backend_
 {
        struct xenbus_device *dev = be->dev;
        unsigned long tx_ring_ref, rx_ring_ref;
-       unsigned int evtchn;
+       unsigned int evtchn, rx_copy;
        int err;
        int val;
 
@@ -365,6 +385,19 @@ static int connect_rings(struct backend_
                                 dev->otherend);
                return err;
        }
+
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
+                          &rx_copy);
+       if (err == -ENOENT) {
+               err = 0;
+               rx_copy = 0;
+       }
+       if (err < 0) {
+               xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
+                                dev->otherend);
+               return err;
+       }
+       be->netif->copying_receiver = !!rx_copy;
 
        if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
                         &val) < 0)
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Sun Aug 20 
11:08:45 2006 -0400
@@ -58,12 +58,27 @@
 #include <xen/interface/memory.h>
 #include <xen/balloon.h>
 #include <asm/page.h>
+#include <asm/maddr.h>
 #include <asm/uaccess.h>
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 
 #define RX_COPY_THRESHOLD 256
 
+/* If we don't have GSO, fake things up so that we never try to use it. */
+#ifndef NETIF_F_GSO
+#define netif_needs_gso(dev, skb)      0
+#define dev_disable_gso_features(dev)  ((void)0)
+#else
+#define HAVE_GSO                       1
+static inline void dev_disable_gso_features(struct net_device *dev)
+{
+       /* Turn off all GSO bits except ROBUST. */
+       dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
+       dev->features |= NETIF_F_GSO_ROBUST;
+}
+#endif
+
 #define GRANT_INVALID_REF      0
 
 #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
@@ -83,6 +98,7 @@ struct netfront_info {
 
        unsigned int handle;
        unsigned int evtchn, irq;
+       unsigned int copying_receiver;
 
        /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
@@ -171,7 +187,7 @@ static inline grant_ref_t xennet_get_rx_
 
 static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
 static int setup_device(struct xenbus_device *, struct netfront_info *);
-static struct net_device *create_netdev(int, struct xenbus_device *);
+static struct net_device *create_netdev(int, int, struct xenbus_device *);
 
 static void netfront_closing(struct xenbus_device *);
 
@@ -213,6 +229,7 @@ static int __devinit netfront_probe(stru
        struct net_device *netdev;
        struct netfront_info *info;
        unsigned int handle;
+       unsigned feature_rx_copy;
 
        err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
        if (err != 1) {
@@ -220,7 +237,22 @@ static int __devinit netfront_probe(stru
                return err;
        }
 
-       netdev = create_netdev(handle, dev);
+#ifndef CONFIG_XEN
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
+                          &feature_rx_copy);
+       if (err != 1) {
+               xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
+               return err;
+       }
+       if (!feature_rx_copy) {
+               xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
+               return -EINVAL;
+       }
+#else
+       feature_rx_copy = 0;
+#endif
+
+       netdev = create_netdev(handle, feature_rx_copy, dev);
        if (IS_ERR(netdev)) {
                err = PTR_ERR(netdev);
                xenbus_dev_fatal(dev, err, "creating netdev");
@@ -326,6 +358,13 @@ again:
                goto abort_transaction;
        }
 
+       err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
+                           info->copying_receiver);
+       if (err) {
+               message = "writing request-rx-copy";
+               goto abort_transaction;
+       }
+
        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
        if (err) {
                message = "writing feature-rx-notify";
@@ -338,11 +377,13 @@ again:
                goto abort_transaction;
        }
 
+#ifdef HAVE_GSO
        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
        if (err) {
                message = "writing feature-gso-tcpv4";
                goto abort_transaction;
        }
+#endif
 
        err = xenbus_transaction_end(xbt, 0);
        if (err) {
@@ -415,7 +456,8 @@ static int setup_device(struct xenbus_de
 
        memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
        err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
-                                       SA_SAMPLE_RANDOM, netdev->name, netdev);
+                                       SA_SAMPLE_RANDOM, netdev->name,
+                                       netdev);
        if (err < 0)
                goto fail;
        info->irq = err;
@@ -494,11 +536,14 @@ static int network_open(struct net_devic
 
        memset(&np->stats, 0, sizeof(np->stats));
 
-       network_alloc_rx_buffers(dev);
-       np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
-
-       if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-               netif_rx_schedule(dev);
+       spin_lock(&np->rx_lock);
+       if (netif_carrier_ok(dev)) {
+               network_alloc_rx_buffers(dev);
+               np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
+               if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+                       netif_rx_schedule(dev);
+       }
+       spin_unlock(&np->rx_lock);
 
        netif_start_queue(dev);
 
@@ -527,8 +572,7 @@ static void network_tx_buf_gc(struct net
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb;
 
-       if (unlikely(!netif_carrier_ok(dev)))
-               return;
+       BUG_ON(!netif_carrier_ok(dev));
 
        do {
                prod = np->tx.sring->rsp_prod;
@@ -597,6 +641,8 @@ static void network_alloc_rx_buffers(str
        grant_ref_t ref;
        unsigned long pfn;
        void *vaddr;
+       int nr_flips;
+       netif_rx_request_t *req;
 
        if (unlikely(!netif_carrier_ok(dev)))
                return;
@@ -652,7 +698,7 @@ no_skb:
                np->rx_target = np->rx_max_target;
 
  refill:
-       for (i = 0; ; i++) {
+       for (nr_flips = i = 0; ; i++) {
                if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
                        break;
 
@@ -663,7 +709,6 @@ no_skb:
                BUG_ON(np->rx_skbs[id]);
                np->rx_skbs[id] = skb;
 
-               RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
                ref = gnttab_claim_grant_reference(&np->gref_rx_head);
                BUG_ON((signed short)ref < 0);
                np->grant_rx_ref[id] = ref;
@@ -671,49 +716,68 @@ no_skb:
                pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
                vaddr = page_address(skb_shinfo(skb)->frags[0].page);
 
-               gnttab_grant_foreign_transfer_ref(ref,
-                                                 np->xbdev->otherend_id, pfn);
-               RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
-               np->rx_pfn_array[i] = pfn_to_mfn(pfn);
+               req = RING_GET_REQUEST(&np->rx, req_prod + i);
+               if (!np->copying_receiver) {
+                       gnttab_grant_foreign_transfer_ref(ref,
+                                                         
np->xbdev->otherend_id,
+                                                         pfn);
+                       np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
+                       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                               /* Remove this page before passing
+                                * back to Xen. */
+                               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+                               MULTI_update_va_mapping(np->rx_mcl+i,
+                                                       (unsigned long)vaddr,
+                                                       __pte(0), 0);
+                       }
+                       nr_flips++;
+               } else {
+                       gnttab_grant_foreign_access_ref(ref,
+                                                       np->xbdev->otherend_id,
+                                                       pfn,
+                                                       0);
+               }
+
+               req->id = id;
+               req->gref = ref;
+       }
+
+       if ( nr_flips != 0 ) {
+               /* Tell the ballon driver what is going on. */
+               balloon_update_driver_allowance(i);
+
+               set_xen_guest_handle(reservation.extent_start,
+                                    np->rx_pfn_array);
+               reservation.nr_extents   = nr_flips;
+               reservation.extent_order = 0;
+               reservation.address_bits = 0;
+               reservation.domid        = DOMID_SELF;
 
                if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       /* Remove this page before passing back to Xen. */
-                       set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-                       MULTI_update_va_mapping(np->rx_mcl+i,
-                                               (unsigned long)vaddr,
-                                               __pte(0), 0);
+                       /* After all PTEs have been zapped, flush the TLB. */
+                       np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+                               UVMF_TLB_FLUSH|UVMF_ALL;
+
+                       /* Give away a batch of pages. */
+                       np->rx_mcl[i].op = __HYPERVISOR_memory_op;
+                       np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+                       np->rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+                       /* Zap PTEs and give away pages in one big
+                        * multicall. */
+                       (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
+
+                       /* Check return status of HYPERVISOR_memory_op(). */
+                       if (unlikely(np->rx_mcl[i].result != i))
+                               panic("Unable to reduce memory reservation\n");
+               } else {
+                       if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                                &reservation) != i)
+                               panic("Unable to reduce memory reservation\n");
                }
-       }
-
-       /* Tell the ballon driver what is going on. */
-       balloon_update_driver_allowance(i);
-
-       set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
-       reservation.nr_extents   = i;
-       reservation.extent_order = 0;
-       reservation.address_bits = 0;
-       reservation.domid        = DOMID_SELF;
-
-       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-               /* After all PTEs have been zapped, flush the TLB. */
-               np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
-                       UVMF_TLB_FLUSH|UVMF_ALL;
-
-               /* Give away a batch of pages. */
-               np->rx_mcl[i].op = __HYPERVISOR_memory_op;
-               np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
-               np->rx_mcl[i].args[1] = (unsigned long)&reservation;
-
-               /* Zap PTEs and give away pages in one big multicall. */
-               (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
-
-               /* Check return status of HYPERVISOR_memory_op(). */
-               if (unlikely(np->rx_mcl[i].result != i))
-                       panic("Unable to reduce memory reservation\n");
-       } else
-               if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-                                        &reservation) != i)
-                       panic("Unable to reduce memory reservation\n");
+       } else {
+               wmb();
+       }
 
        /* Above is a suitable barrier to ensure backend will see requests. */
        np->rx.req_prod_pvt = req_prod + i;
@@ -840,9 +904,12 @@ static int network_start_xmit(struct sk_
 
        if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
                tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+#ifdef CONFIG_XEN
        if (skb->proto_data_valid) /* remote but checksummed? */
                tx->flags |= NETTXF_data_validated;
-
+#endif
+
+#ifdef HAVE_GSO
        if (skb_shinfo(skb)->gso_size) {
                struct netif_extra_info *gso = (struct netif_extra_info *)
                        RING_GET_REQUEST(&np->tx, ++i);
@@ -861,6 +928,7 @@ static int network_start_xmit(struct sk_
                gso->flags = 0;
                extra = gso;
        }
+#endif
 
        np->tx.req_prod_pvt = i + 1;
 
@@ -896,12 +964,15 @@ static irqreturn_t netif_int(int irq, vo
        unsigned long flags;
 
        spin_lock_irqsave(&np->tx_lock, flags);
-       network_tx_buf_gc(dev);
+
+       if (likely(netif_carrier_ok(dev))) {
+               network_tx_buf_gc(dev);
+               /* Under tx_lock: protects access to rx shared-ring indexes. */
+               if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+                       netif_rx_schedule(dev);
+       }
+
        spin_unlock_irqrestore(&np->tx_lock, flags);
-
-       if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
-           likely(netif_running(dev)))
-               netif_rx_schedule(dev);
 
        return IRQ_HANDLED;
 }
@@ -947,8 +1018,10 @@ int xennet_get_extras(struct netfront_in
                                WPRINTK("Invalid extra type: %d\n",
                                        extra->type);
                        err = -EINVAL;
-               } else
-                       memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+               } else {
+                       memcpy(&extras[extra->type - 1], extra,
+                              sizeof(*extra));
+               }
 
                skb = xennet_get_rx_skb(np, cons);
                ref = xennet_get_rx_ref(np, cons);
@@ -961,10 +1034,12 @@ int xennet_get_extras(struct netfront_in
 
 static int xennet_get_responses(struct netfront_info *np,
                                struct netfront_rx_info *rinfo, RING_IDX rp,
-                               struct sk_buff_head *list, int count)
-{
-       struct mmu_update *mmu = np->rx_mmu + count;
-       struct multicall_entry *mcl = np->rx_mcl + count;
+                               struct sk_buff_head *list,
+                               int *pages_flipped_p)
+{
+       int pages_flipped = *pages_flipped_p;
+       struct mmu_update *mmu;
+       struct multicall_entry *mcl;
        struct netif_rx_response *rx = &rinfo->rx;
        struct netif_extra_info *extras = rinfo->extras;
        RING_IDX cons = np->rx.rsp_cons;
@@ -973,6 +1048,7 @@ static int xennet_get_responses(struct n
        int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
        int frags = 1;
        int err = 0;
+       unsigned long ret;
 
        if (rx->flags & NETRXF_extra_info) {
                err = xennet_get_extras(np, extras, rp);
@@ -988,6 +1064,7 @@ static int xennet_get_responses(struct n
                                WPRINTK("rx->offset: %x, size: %u\n",
                                        rx->offset, rx->status);
                        err = -EINVAL;
+                       goto next;
                }
 
                /*
@@ -1001,35 +1078,47 @@ static int xennet_get_responses(struct n
                        goto next;
                }
 
-               /* Memory pressure, insufficient buffer headroom, ... */
-               if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
-                       if (net_ratelimit())
-                               WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
-                                       rx->id, rx->status);
-                       xennet_move_rx_slot(np, skb, ref);
-                       err = -ENOMEM;
-                       goto next;
+               if (!np->copying_receiver) {
+                       /* Memory pressure, insufficient buffer
+                        * headroom, ... */
+                       if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
+                               if (net_ratelimit())
+                                       WPRINTK("Unfulfilled rx req "
+                                               "(id=%d, st=%d).\n",
+                                               rx->id, rx->status);
+                               xennet_move_rx_slot(np, skb, ref);
+                               err = -ENOMEM;
+                               goto next;
+                       }
+
+                       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                               /* Remap the page. */
+                               struct page *page =
+                                       skb_shinfo(skb)->frags[0].page;
+                               unsigned long pfn = page_to_pfn(page);
+                               void *vaddr = page_address(page);
+
+                               mcl = np->rx_mcl + pages_flipped;
+                               mmu = np->rx_mmu + pages_flipped;
+
+                               MULTI_update_va_mapping(mcl,
+                                                       (unsigned long)vaddr,
+                                                       pfn_pte_ma(mfn,
+                                                                  PAGE_KERNEL),
+                                                       0);
+                               mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
+                                       | MMU_MACHPHYS_UPDATE;
+                               mmu->val = pfn;
+
+                               set_phys_to_machine(pfn, mfn);
+                       }
+                       pages_flipped++;
+               } else {
+                       ret = gnttab_end_foreign_access_ref(ref, 0);
+                       BUG_ON(!ret);
                }
 
                gnttab_release_grant_reference(&np->gref_rx_head, ref);
-
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       /* Remap the page. */
-                       struct page *page = skb_shinfo(skb)->frags[0].page;
-                       unsigned long pfn = page_to_pfn(page);
-                       void *vaddr = page_address(page);
-
-                       MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
-                                               pfn_pte_ma(mfn, PAGE_KERNEL),
-                                               0);
-                       mcl++;
-                       mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
-                               | MMU_MACHPHYS_UPDATE;
-                       mmu->val = pfn;
-                       mmu++;
-
-                       set_phys_to_machine(pfn, mfn);
-               }
 
                __skb_queue_tail(list, skb);
 
@@ -1056,6 +1145,8 @@ next:
                err = -E2BIG;
        }
 
+       *pages_flipped_p = pages_flipped;
+
        return err;
 }
 
@@ -1090,7 +1181,8 @@ static RING_IDX xennet_fill_frags(struct
        return cons;
 }
 
-static int xennet_set_skb_gso(struct sk_buff *skb, struct netif_extra_info 
*gso)
+static int xennet_set_skb_gso(struct sk_buff *skb,
+                             struct netif_extra_info *gso)
 {
        if (!gso->u.gso.size) {
                if (net_ratelimit())
@@ -1105,6 +1197,7 @@ static int xennet_set_skb_gso(struct sk_
                return -EINVAL;
        }
 
+#ifdef HAVE_GSO
        skb_shinfo(skb)->gso_size = gso->u.gso.size;
        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
@@ -1113,6 +1206,11 @@ static int xennet_set_skb_gso(struct sk_
        skb_shinfo(skb)->gso_segs = 0;
 
        return 0;
+#else
+       if (net_ratelimit())
+               WPRINTK("GSO unsupported by this kernel.\n");
+       return -EINVAL;
+#endif
 }
 
 static int netif_poll(struct net_device *dev, int *pbudget)
@@ -1130,7 +1228,7 @@ static int netif_poll(struct net_device 
        struct sk_buff_head tmpq;
        unsigned long flags;
        unsigned int len;
-       int pages_done;
+       int pages_flipped = 0;
        int err;
 
        spin_lock(&np->rx_lock);
@@ -1149,14 +1247,14 @@ static int netif_poll(struct net_device 
        rp = np->rx.sring->rsp_prod;
        rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-       for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0;
+       for (i = np->rx.rsp_cons, work_done = 0;
             (i != rp) && (work_done < budget);
             np->rx.rsp_cons = ++i, work_done++) {
                memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
                memset(extras, 0, sizeof(extras));
 
-               err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done);
-               pages_done += skb_queue_len(&tmpq);
+               err = xennet_get_responses(np, &rinfo, rp, &tmpq,
+                                          &pages_flipped);
 
                if (unlikely(err)) {
 err:
@@ -1200,26 +1298,26 @@ err:
 
                i = xennet_fill_frags(np, skb, &tmpq);
 
-        /*
-         * Truesize must approximates the size of true data plus
-         * any supervisor overheads. Adding hypervisor overheads
-         * has been shown to significantly reduce achievable
-         * bandwidth with the default receive buffer size. It is
-         * therefore not wise to account for it here.
-         *
-         * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
-         * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
-         * add the size of the data pulled in xennet_fill_frags().
-         *
-         * We also adjust for any unused space in the main data
-         * area by subtracting (RX_COPY_THRESHOLD - len). This is
-         * especially important with drivers which split incoming
-         * packets into header and data, using only 66 bytes of
-         * the main data area (see the e1000 driver for example.)
-         * On such systems, without this last adjustement, our
-         * achievable receive throughout using the standard receive
-         * buffer size was cut by 25%(!!!).
-         */
+               /*
+                * Truesize must approximates the size of true data plus
+                * any supervisor overheads. Adding hypervisor overheads
+                * has been shown to significantly reduce achievable
+                * bandwidth with the default receive buffer size. It is
+                * therefore not wise to account for it here.
+                *
+                * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
+                * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
+                * add the size of the data pulled in xennet_fill_frags().
+                *
+                * We also adjust for any unused space in the main data
+                * area by subtracting (RX_COPY_THRESHOLD - len). This is
+                * especially important with drivers which split incoming
+                * packets into header and data, using only 66 bytes of
+                * the main data area (see the e1000 driver for example.)
+                * On such systems, without this last adjustement, our
+                * achievable receive throughout using the standard receive
+                * buffer size was cut by 25%(!!!).
+                */
                skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
                skb->len += skb->data_len;
 
@@ -1227,33 +1325,35 @@ err:
                 * Old backends do not assert data_validated but we
                 * can infer it from csum_blank so test both flags.
                 */
-               if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
+               if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       skb->proto_data_valid = 1;
-               } else {
+               else
                        skb->ip_summed = CHECKSUM_NONE;
-                       skb->proto_data_valid = 0;
-               }
+#ifdef CONFIG_XEN
+               skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
                skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
-
+#endif
                np->stats.rx_packets++;
                np->stats.rx_bytes += skb->len;
 
                __skb_queue_tail(&rxq, skb);
        }
 
-       /* Some pages are no longer absent... */
-       balloon_update_driver_allowance(-pages_done);
-
-       /* Do all the remapping work, and M2P updates, in one big hypercall. */
-       if (likely(pages_done)) {
-               mcl = np->rx_mcl + pages_done;
-               mcl->op = __HYPERVISOR_mmu_update;
-               mcl->args[0] = (unsigned long)np->rx_mmu;
-               mcl->args[1] = pages_done;
-               mcl->args[2] = 0;
-               mcl->args[3] = DOMID_SELF;
-               (void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1);
+       if (pages_flipped) {
+               /* Some pages are no longer absent... */
+               balloon_update_driver_allowance(-pages_flipped);
+
+               /* Do all the remapping work and M2P updates. */
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       mcl = np->rx_mcl + pages_flipped;
+                       mcl->op = __HYPERVISOR_mmu_update;
+                       mcl->args[0] = (unsigned long)np->rx_mmu;
+                       mcl->args[1] = pages_flipped;
+                       mcl->args[2] = 0;
+                       mcl->args[3] = DOMID_SELF;
+                       (void)HYPERVISOR_multicall(np->rx_mcl,
+                                                  pages_flipped + 1);
+               }
        }
 
        while ((skb = __skb_dequeue(&errq)))
@@ -1304,97 +1404,12 @@ err:
        return more_to_do;
 }
 
-
-static int network_close(struct net_device *dev)
-{
-       struct netfront_info *np = netdev_priv(dev);
-       netif_stop_queue(np->netdev);
-       return 0;
-}
-
-
-static struct net_device_stats *network_get_stats(struct net_device *dev)
-{
-       struct netfront_info *np = netdev_priv(dev);
-       return &np->stats;
-}
-
-static int xennet_change_mtu(struct net_device *dev, int mtu)
-{
-       int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
-
-       if (mtu > max)
-               return -EINVAL;
-       dev->mtu = mtu;
-       return 0;
-}
-
-static int xennet_set_sg(struct net_device *dev, u32 data)
-{
-       if (data) {
-               struct netfront_info *np = netdev_priv(dev);
-               int val;
-
-               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
-                                "%d", &val) < 0)
-                       val = 0;
-               if (!val)
-                       return -ENOSYS;
-       } else if (dev->mtu > ETH_DATA_LEN)
-               dev->mtu = ETH_DATA_LEN;
-
-       return ethtool_op_set_sg(dev, data);
-}
-
-static int xennet_set_tso(struct net_device *dev, u32 data)
-{
-       if (data) {
-               struct netfront_info *np = netdev_priv(dev);
-               int val;
-
-               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
-                                "feature-gso-tcpv4", "%d", &val) < 0)
-                       val = 0;
-               if (!val)
-                       return -ENOSYS;
-       }
-
-       return ethtool_op_set_tso(dev, data);
-}
-
-static void xennet_set_features(struct net_device *dev)
-{
-       /* Turn off all GSO bits except ROBUST. */
-       dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
-       dev->features |= NETIF_F_GSO_ROBUST;
-       xennet_set_sg(dev, 0);
-
-       if (!xennet_set_sg(dev, 1))
-               xennet_set_tso(dev, 1);
-}
-
-static void network_connect(struct net_device *dev)
-{
-       struct netfront_info *np = netdev_priv(dev);
-       int i, requeue_idx;
+static void netif_release_tx_bufs(struct netfront_info *np)
+{
        struct sk_buff *skb;
-       grant_ref_t ref;
-
-       xennet_set_features(dev);
-
-       spin_lock_irq(&np->tx_lock);
-       spin_lock(&np->rx_lock);
-
-       /*
-         * Recovery procedure:
-        *  NB. Freelist index entries are always going to be less than
-        *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
-        *  greater than PAGE_OFFSET: we use this property to distinguish
-        *  them.
-         */
-
-       /* Step 1: Discard all pending TX packet fragments. */
-       for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
+       int i;
+
+       for (i = 1; i <= NET_TX_RING_SIZE; i++) {
                if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
                        continue;
 
@@ -1407,6 +1422,191 @@ static void network_connect(struct net_d
                add_id_to_freelist(np->tx_skbs, i);
                dev_kfree_skb_irq(skb);
        }
+}
+
+static void netif_release_rx_bufs(struct netfront_info *np)
+{
+       struct mmu_update      *mmu = np->rx_mmu;
+       struct multicall_entry *mcl = np->rx_mcl;
+       struct sk_buff_head free_list;
+       struct sk_buff *skb;
+       unsigned long mfn;
+       int xfer = 0, noxfer = 0, unused = 0;
+       int id, ref;
+
+       if (np->copying_receiver) {
+               printk("%s: fix me for copying receiver.\n", __FUNCTION__);
+               return;
+       }
+
+       skb_queue_head_init(&free_list);
+
+       spin_lock(&np->rx_lock);
+
+       for (id = 0; id < NET_RX_RING_SIZE; id++) {
+               if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
+                       unused++;
+                       continue;
+               }
+
+               skb = np->rx_skbs[id];
+               mfn = gnttab_end_foreign_transfer_ref(ref);
+               gnttab_release_grant_reference(&np->gref_rx_head, ref);
+               np->grant_rx_ref[id] = GRANT_INVALID_REF;
+               add_id_to_freelist(np->rx_skbs, id);
+
+               if (0 == mfn) {
+                       struct page *page = skb_shinfo(skb)->frags[0].page;
+                       balloon_release_driver_page(page);
+                       skb_shinfo(skb)->nr_frags = 0;
+                       dev_kfree_skb(skb);
+                       noxfer++;
+                       continue;
+               }
+
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Remap the page. */
+                       struct page *page = skb_shinfo(skb)->frags[0].page;
+                       unsigned long pfn = page_to_pfn(page);
+                       void *vaddr = page_address(page);
+
+                       MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+                                               pfn_pte_ma(mfn, PAGE_KERNEL),
+                                               0);
+                       mcl++;
+                       mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
+                               | MMU_MACHPHYS_UPDATE;
+                       mmu->val = pfn;
+                       mmu++;
+
+                       set_phys_to_machine(pfn, mfn);
+               }
+               __skb_queue_tail(&free_list, skb);
+               xfer++;
+       }
+
+       printk("%s: %d xfer, %d noxfer, %d unused\n",
+              __FUNCTION__, xfer, noxfer, unused);
+
+       if (xfer) {
+               /* Some pages are no longer absent... */
+               balloon_update_driver_allowance(-xfer);
+
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Do all the remapping work and M2P updates. */
+                       mcl->op = __HYPERVISOR_mmu_update;
+                       mcl->args[0] = (unsigned long)np->rx_mmu;
+                       mcl->args[1] = mmu - np->rx_mmu;
+                       mcl->args[2] = 0;
+                       mcl->args[3] = DOMID_SELF;
+                       mcl++;
+                       HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
+               }
+       }
+
+       while ((skb = __skb_dequeue(&free_list)) != NULL)
+               dev_kfree_skb(skb);
+
+       spin_unlock(&np->rx_lock);
+}
+
+static int network_close(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       netif_stop_queue(np->netdev);
+       return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       return &np->stats;
+}
+
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+       int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+       if (mtu > max)
+               return -EINVAL;
+       dev->mtu = mtu;
+       return 0;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+       if (data) {
+               struct netfront_info *np = netdev_priv(dev);
+               int val;
+
+               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
+                                "%d", &val) < 0)
+                       val = 0;
+               if (!val)
+                       return -ENOSYS;
+       } else if (dev->mtu > ETH_DATA_LEN)
+               dev->mtu = ETH_DATA_LEN;
+
+       return ethtool_op_set_sg(dev, data);
+}
+
+static int xennet_set_tso(struct net_device *dev, u32 data)
+{
+#ifdef HAVE_GSO
+       if (data) {
+               struct netfront_info *np = netdev_priv(dev);
+               int val;
+
+               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+                                "feature-gso-tcpv4", "%d", &val) < 0)
+                       val = 0;
+               if (!val)
+                       return -ENOSYS;
+       }
+
+       return ethtool_op_set_tso(dev, data);
+#else
+       return -ENOSYS;
+#endif
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+       dev_disable_gso_features(dev);
+       xennet_set_sg(dev, 0);
+
+       /* We need checksum offload to enable scatter/gather and TSO. */
+       if (!(dev->features & NETIF_F_ALL_CSUM))
+               return;
+
+       if (!xennet_set_sg(dev, 1))
+               xennet_set_tso(dev, 1);
+}
+
+static void network_connect(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       int i, requeue_idx;
+       struct sk_buff *skb;
+       grant_ref_t ref;
+       netif_rx_request_t *req;
+
+       xennet_set_features(dev);
+
+       spin_lock_irq(&np->tx_lock);
+       spin_lock(&np->rx_lock);
+
+       /*
+        * Recovery procedure:
+        *  NB. Freelist index entries are always going to be less than
+        *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
+        *  greater than PAGE_OFFSET: we use this property to distinguish
+        *  them.
+        */
+
+       /* Step 1: Discard all pending TX packet fragments. */
+       netif_release_tx_bufs(np);
 
        /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
        for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
@@ -1415,13 +1615,20 @@ static void network_connect(struct net_d
 
                skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
                ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
-
-               gnttab_grant_foreign_transfer_ref(
-                       ref, np->xbdev->otherend_id,
-                       page_to_pfn(skb_shinfo(skb)->frags->page));
-
-               RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
-               RING_GET_REQUEST(&np->rx, requeue_idx)->id   = requeue_idx;
+               req = RING_GET_REQUEST(&np->rx, requeue_idx);
+
+               if (!np->copying_receiver) {
+                       gnttab_grant_foreign_transfer_ref(
+                               ref, np->xbdev->otherend_id,
+                               page_to_pfn(skb_shinfo(skb)->frags->page));
+               } else {
+                       gnttab_grant_foreign_access_ref(
+                               ref, np->xbdev->otherend_id,
+                               page_to_pfn(skb_shinfo(skb)->frags->page),
+                               0);
+               }
+               req->gref = ref;
+               req->id   = requeue_idx;
 
                requeue_idx++;
        }
@@ -1446,6 +1653,8 @@ static void netif_uninit(struct net_devi
 static void netif_uninit(struct net_device *dev)
 {
        struct netfront_info *np = netdev_priv(dev);
+       netif_release_tx_bufs(np);
+       netif_release_rx_bufs(np);
        gnttab_free_grant_references(np->gref_tx_head);
        gnttab_free_grant_references(np->gref_rx_head);
 }
@@ -1604,13 +1813,8 @@ static void network_set_multicast_list(s
 {
 }
 
-/** Create a network device.
- * @param handle device handle
- * @param val return parameter for created device
- * @return 0 on success, error code otherwise
- */
-static struct net_device * __devinit create_netdev(int handle,
-                                                  struct xenbus_device *dev)
+static struct net_device * __devinit
+create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
 {
        int i, err = 0;
        struct net_device *netdev = NULL;
@@ -1623,9 +1827,10 @@ static struct net_device * __devinit cre
                return ERR_PTR(-ENOMEM);
        }
 
-       np                = netdev_priv(netdev);
-       np->handle        = handle;
-       np->xbdev         = dev;
+       np                   = netdev_priv(netdev);
+       np->handle           = handle;
+       np->xbdev            = dev;
+       np->copying_receiver = copying_receiver;
 
        netif_carrier_off(netdev);
 
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Sun Aug 20 11:08:45 
2006 -0400
@@ -181,6 +181,7 @@ static void pciback_frontend_changed(str
                xenbus_switch_state(xdev, XenbusStateClosing);
                break;
 
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
                device_unregister(&xdev->dev);
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Sun Aug 20 
11:08:45 2006 -0400
@@ -225,6 +225,7 @@ static void pcifront_backend_changed(str
                pcifront_try_disconnect(pdev);
                break;
 
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                dev_warn(&xdev->dev, "backend went away!\n");
                pcifront_try_disconnect(pdev);
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Sun Aug 20 
11:08:45 2006 -0400
@@ -108,13 +108,15 @@ static int privcmd_ioctl(struct inode *i
        }
        break;
 
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
        case IOCTL_PRIVCMD_MMAP: {
 #define PRIVCMD_MMAP_SZ 32
                privcmd_mmap_t mmapcmd;
                privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
                privcmd_mmap_entry_t __user *p;
                int i, rc;
+
+               if (!is_initial_xendomain())
+                       return -EPERM;
 
                if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
                        return -EFAULT;
@@ -162,8 +164,11 @@ static int privcmd_ioctl(struct inode *i
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
                xen_pfn_t __user *p;
-               unsigned long addr, mfn; 
+               unsigned long addr, mfn;
                int i;
+
+               if (!is_initial_xendomain())
+                       return -EPERM;
 
                if (copy_from_user(&m, udata, sizeof(m))) {
                        ret = -EFAULT;
@@ -215,7 +220,6 @@ static int privcmd_ioctl(struct inode *i
                break;
        }
        break;
-#endif
 
        default:
                ret = -EINVAL;
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Sun Aug 20 11:08:45 
2006 -0400
@@ -34,7 +34,6 @@ struct backend_info
 
        /* watch front end for changes */
        struct xenbus_watch backend_watch;
-       enum xenbus_state frontend_state;
 };
 
 static void maybe_connect(struct backend_info *be);
@@ -143,8 +142,6 @@ static void frontend_changed(struct xenb
        struct backend_info *be = dev->dev.driver_data;
        int err;
 
-       be->frontend_state = frontend_state;
-
        switch (frontend_state) {
        case XenbusStateInitialising:
        case XenbusStateInitialised:
@@ -162,13 +159,12 @@ static void frontend_changed(struct xenb
                be->instance = -1;
                break;
 
+       case XenbusStateUnknown:
        case XenbusStateClosed:
                device_unregister(&be->dev->dev);
                tpmback_remove(dev);
                break;
 
-       case XenbusStateUnknown:
-       case XenbusStateInitWait:
        default:
                xenbus_dev_fatal(dev, -EINVAL,
                                 "saw state %d at frontend",
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Sun Aug 20 
11:08:45 2006 -0400
@@ -274,7 +274,7 @@ enum xenbus_state xenbus_read_driver_sta
        enum xenbus_state result;
        int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
        if (err)
-               result = XenbusStateClosed;
+               result = XenbusStateUnknown;
 
        return result;
 }
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Sun Aug 20 
11:08:45 2006 -0400
@@ -346,7 +346,7 @@ static struct file_operations xenbus_dev
        .poll = xenbus_dev_poll,
 };
 
-static int __init
+int __init
 xenbus_dev_init(void)
 {
        xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
@@ -355,5 +355,3 @@ xenbus_dev_init(void)
 
        return 0;
 }
-
-__initcall(xenbus_dev_init);
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Sun Aug 20 
11:08:45 2006 -0400
@@ -45,12 +45,14 @@
 
 #include <asm/io.h>
 #include <asm/page.h>
+#include <asm/maddr.h>
 #include <asm/pgtable.h>
 #include <asm/hypervisor.h>
 #include <xen/xenbus.h>
 #include <xen/xen_proc.h>
 #include <xen/evtchn.h>
 #include <xen/features.h>
+#include <xen/hvm.h>
 
 #include "xenbus_comms.h"
 
@@ -63,6 +65,14 @@ static struct notifier_block *xenstore_c
 static struct notifier_block *xenstore_chain;
 
 static void wait_for_devices(struct xenbus_driver *xendrv);
+
+static int xenbus_probe_frontend(const char *type, const char *name);
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+                                int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+static int xenbus_dev_probe(struct device *_dev);
+static int xenbus_dev_remove(struct device *_dev);
 
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
@@ -172,15 +182,16 @@ static int read_frontend_details(struct 
 
 
 /* Bus type for frontend drivers. */
-static int xenbus_probe_frontend(const char *type, const char *name);
 static struct xen_bus_type xenbus_frontend = {
        .root = "device",
        .levels = 2,            /* device/type/<id> */
        .get_bus_id = frontend_bus_id,
        .probe = xenbus_probe_frontend,
        .bus = {
-               .name  = "xen",
-               .match = xenbus_match,
+               .name     = "xen",
+               .match    = xenbus_match,
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
        },
        .dev = {
                .bus_id = "xen",
@@ -225,18 +236,17 @@ static int backend_bus_id(char bus_id[BU
        return 0;
 }
 
-static int xenbus_uevent_backend(struct device *dev, char **envp,
-                                int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
 static struct xen_bus_type xenbus_backend = {
        .root = "backend",
        .levels = 3,            /* backend/type/<frontend>/<id> */
        .get_bus_id = backend_bus_id,
        .probe = xenbus_probe_backend,
        .bus = {
-               .name  = "xen-backend",
-               .match = xenbus_match,
-               .uevent = xenbus_uevent_backend,
+               .name     = "xen-backend",
+               .match    = xenbus_match,
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
+               .uevent   = xenbus_uevent_backend,
        },
        .dev = {
                .bus_id = "xen-backend",
@@ -403,8 +413,6 @@ static int xenbus_register_driver_common
        drv->driver.name = drv->name;
        drv->driver.bus = &bus->bus;
        drv->driver.owner = drv->owner;
-       drv->driver.probe = xenbus_dev_probe;
-       drv->driver.remove = xenbus_dev_remove;
 
        mutex_lock(&xenwatch_mutex);
        ret = driver_register(&drv->driver);
@@ -844,7 +852,7 @@ static int resume_dev(struct device *dev
                        printk(KERN_WARNING
                               "xenbus: resume %s failed: %i\n", 
                               dev->bus_id, err);
-                       return err; 
+                       return err;
                }
        }
 
@@ -856,7 +864,7 @@ static int resume_dev(struct device *dev
                return err;
        }
 
-       return 0; 
+       return 0;
 }
 
 void xenbus_suspend(void)
@@ -962,7 +970,7 @@ static int xsd_port_read(char *page, cha
 
 static int __init xenbus_probe_init(void)
 {
-       int err = 0, dom0;
+       int err = 0;
        unsigned long page = 0;
 
        DPRINTK("");
@@ -977,9 +985,7 @@ static int __init xenbus_probe_init(void
        /*
         * Domain0 doesn't have a store_evtchn or store_mfn yet.
         */
-       dom0 = (xen_start_info->store_evtchn == 0);
-
-       if (dom0) {
+       if (is_initial_xendomain()) {
                struct evtchn_alloc_unbound alloc_unbound;
 
                /* Allocate page. */
@@ -1017,13 +1023,23 @@ static int __init xenbus_probe_init(void
                if (xsd_port_intf)
                        xsd_port_intf->read_proc = xsd_port_read;
 #endif
+               xen_store_interface = mfn_to_virt(xen_store_mfn);
        } else {
                xenstored_ready = 1;
+#ifdef CONFIG_XEN
                xen_store_evtchn = xen_start_info->store_evtchn;
                xen_store_mfn = xen_start_info->store_mfn;
-       }
-
-       xen_store_interface = mfn_to_virt(xen_store_mfn);
+               xen_store_interface = mfn_to_virt(xen_store_mfn);
+#else
+               xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+               xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
+               xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
+                                             PAGE_SIZE);
+#endif
+       }
+
+
+       xenbus_dev_init();
 
        /* Initialize the interface to xenstore. */
        err = xs_init();
@@ -1037,7 +1053,7 @@ static int __init xenbus_probe_init(void
        device_register(&xenbus_frontend.dev);
        device_register(&xenbus_backend.dev);
 
-       if (!dom0)
+       if (!is_initial_xendomain())
                xenbus_probe(NULL);
 
        return 0;
@@ -1056,6 +1072,8 @@ static int __init xenbus_probe_init(void
 }
 
 postcore_initcall(xenbus_probe_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
 
 
 static int is_disconnected_device(struct device *dev, void *data)
@@ -1140,6 +1158,7 @@ static void wait_for_devices(struct xenb
                         print_device_status);
 }
 
+#ifndef MODULE
 static int __init boot_wait_for_devices(void)
 {
        ready_to_wait_for_devices = 1;
@@ -1148,3 +1167,4 @@ static int __init boot_wait_for_devices(
 }
 
 late_initcall(boot_wait_for_devices);
+#endif
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Sun Aug 20 
11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Sun Aug 20 
11:08:45 2006 -0400
@@ -665,7 +665,17 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watc
 
 void xs_suspend(void)
 {
+       struct xenbus_watch *watch;
+       char token[sizeof(watch) * 2 + 1];
+
        down_write(&xs_state.suspend_mutex);
+
+       /* No need for watches_lock: the suspend_mutex is sufficient. */
+       list_for_each_entry(watch, &watches, list) {
+               sprintf(token, "%lX", (long)watch);
+               xs_unwatch(watch->node, token);
+       }
+
        mutex_lock(&xs_state.request_mutex);
 }
 
diff -r 96d6f9cfed6e -r 4cffec02b478 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h   Sun Aug 
20 11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h   Sun Aug 
20 11:08:45 2006 -0400
@@ -58,7 +58,11 @@ extern shared_info_t *HYPERVISOR_shared_
 
 /* arch/xen/i386/kernel/setup.c */
 extern start_info_t *xen_start_info;
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 #define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
+#else
+#define is_initial_xendomain() 0
+#endif
 
 /* arch/xen/kernel/evtchn.c */
 /* Force a proper event-channel callback from Xen. */
@@ -199,6 +203,16 @@ MULTI_update_va_mapping(
 }
 
 static inline void
+MULTI_grant_table_op(multicall_entry_t *mcl, unsigned int cmd,
+                    void *uop, unsigned int count)
+{
+    mcl->op = __HYPERVISOR_grant_table_op;
+    mcl->args[0] = cmd;
+    mcl->args[1] = (unsigned long)uop;
+    mcl->args[2] = count;
+}
+
+static inline void
 MULTI_update_va_mapping_otherdomain(
     multicall_entry_t *mcl, unsigned long va,
     pte_t new_val, unsigned long flags, domid_t domid)
diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/include/xen/balloon.h
--- a/linux-2.6-xen-sparse/include/xen/balloon.h        Sun Aug 20 11:07:52 
2006 -0400
+++ b/linux-2.6-xen-sparse/include/xen/balloon.h        Sun Aug 20 11:08:45 
2006 -0400
@@ -52,6 +52,8 @@ balloon_dealloc_empty_page_range(
 balloon_dealloc_empty_page_range(
        struct page *page, unsigned long nr_pages);
 
+void balloon_release_driver_page(struct page *page);
+
 /*
  * Prevent the balloon driver from changing the memory reservation during
  * a driver critical region.
diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/include/xen/hvm.h
--- a/linux-2.6-xen-sparse/include/xen/hvm.h    Sun Aug 20 11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/include/xen/hvm.h    Sun Aug 20 11:08:45 2006 -0400
@@ -8,10 +8,17 @@ static inline unsigned long hvm_get_para
 static inline unsigned long hvm_get_parameter(int idx)
 {
        struct xen_hvm_param xhv;
+       int r;
 
        xhv.domid = DOMID_SELF;
        xhv.index = idx;
-       return HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+       r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+       if (r < 0) {
+               printk(KERN_ERR "cannot get hvm parameter %d: %d.\n",
+                      idx, r);
+               return 0;
+       }
+       return xhv.value;
 }
 
 #endif /* XEN_HVM_H__ */
diff -r 96d6f9cfed6e -r 4cffec02b478 linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Sun Aug 20 11:07:52 2006 -0400
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Sun Aug 20 11:08:45 2006 -0400
@@ -274,7 +274,7 @@ int xenbus_free_evtchn(struct xenbus_dev
 
 /**
  * Return the state of the driver rooted at the given store path, or
- * XenbusStateClosed if no state can be read.
+ * XenbusStateUnknown if no state can be read.
  */
 enum xenbus_state xenbus_read_driver_state(const char *path);
 
@@ -295,5 +295,6 @@ void xenbus_dev_fatal(struct xenbus_devi
 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
                      ...);
 
+int __init xenbus_dev_init(void);
 
 #endif /* _XEN_XENBUS_H */
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/blktap/lib/Makefile
--- a/tools/blktap/lib/Makefile Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/blktap/lib/Makefile Sun Aug 20 11:08:45 2006 -0400
@@ -61,7 +61,7 @@ libblktap.a: $(OBJS)
              -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
        ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
        ln -sf libblktap.so.$(MAJOR) libblktap.so
-       ar rc $@ libblktap.so
+       $(AR) rc $@ libblktap.so
 
 .PHONY: TAGS all build clean install libblktap
 
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/examples/xmexample.hvm      Sun Aug 20 11:08:45 2006 -0400
@@ -26,6 +26,10 @@ builder='hvm'
 #          memory errors. The domain needs enough memory to boot kernel
 #          and modules. Allocating less than 32MBs is not recommended.
 memory = 128
+
+# Shadow pagetable memory for the domain, in MB.
+# Should be at least 2KB per MB of domain memory, plus a few MB per vcpu.
+shadow_memory = 8
 
 # A name for your domain. All domains must have different names.
 name = "ExampleHVMDomain"
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/firmware/rombios/apmbios.S
--- a/tools/firmware/rombios/apmbios.S  Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/firmware/rombios/apmbios.S  Sun Aug 20 11:08:45 2006 -0400
@@ -225,7 +225,10 @@ APMSYM(05):
 APMSYM(05):
   cmp al, #0x05
   jne APMSYM(07)
+  pushf ; XEN
+  sti   ; XEN: OS calls us with ints disabled -- better re-enable here!
   hlt
+  popf  ; XEN
   jmp APMSYM(ok)
 
 ;-----------------
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/firmware/rombios/rombios.c  Sun Aug 20 11:08:45 2006 -0400
@@ -9459,7 +9459,7 @@ smbios_init:
   mov cx, #0x001f ; 0x1f bytes to copy
   mov ax, #0xf000
   mov es, ax      ; destination segment is 0xf0000
-  mov di, smbios_entry_point ; destination offset
+  mov di, #smbios_entry_point ; destination offset
   mov ax, #0x9f00
   mov ds, ax      ; source segment is 0x9f000
   mov si, #0x0000 ; source offset is 0
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/Makefile.target
--- a/tools/ioemu/Makefile.target       Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/Makefile.target       Sun Aug 20 11:08:45 2006 -0400
@@ -359,6 +359,7 @@ VL_OBJS+= usb-uhci.o
 VL_OBJS+= usb-uhci.o
 VL_OBJS+= piix4acpi.o
 VL_OBJS+= xenstore.o
+VL_OBJS+= xen_platform.o
 DEFINES += -DHAS_AUDIO
 endif
 ifeq ($(TARGET_BASE_ARCH), ppc)
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/hw/pc.c
--- a/tools/ioemu/hw/pc.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/hw/pc.c       Sun Aug 20 11:08:45 2006 -0400
@@ -823,6 +823,9 @@ static void pc_init1(uint64_t ram_size, 
     }
 #endif /* !CONFIG_DM */
 
+    if (pci_enabled)
+        pci_xen_platform_init(pci_bus);
+
     for(i = 0; i < MAX_SERIAL_PORTS; i++) {
         if (serial_hds[i]) {
             serial_init(&pic_set_irq_new, isa_pic,
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/hw/piix_pci.c
--- a/tools/ioemu/hw/piix_pci.c Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/hw/piix_pci.c Sun Aug 20 11:08:45 2006 -0400
@@ -415,7 +415,7 @@ void pci_bios_init(void)
     uint8_t elcr[2];
 
     pci_bios_io_addr = 0xc000;
-    pci_bios_mem_addr = 0xf0000000;
+    pci_bios_mem_addr = HVM_BELOW_4G_MMIO_START;
 
     /* activate IRQ mappings */
     elcr[0] = 0x00;
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/acpi-poweroff-support
--- a/tools/ioemu/patches/acpi-poweroff-support Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/acpi-poweroff-support Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/piix4acpi.c
 Index: ioemu/hw/piix4acpi.c
 ===================================================================
---- ioemu.orig/hw/piix4acpi.c  2006-08-06 02:30:29.288761563 +0100
-+++ ioemu/hw/piix4acpi.c       2006-08-06 02:30:42.131331446 +0100
+--- ioemu.orig/hw/piix4acpi.c  2006-08-17 19:50:05.060576667 +0100
++++ ioemu/hw/piix4acpi.c       2006-08-17 19:50:07.563300039 +0100
 @@ -45,6 +45,10 @@
  #define GBL_RLS           (1 << 2)
  #define SLP_EN            (1 << 13)
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/acpi-support
--- a/tools/ioemu/patches/acpi-support  Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/acpi-support  Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/Makefile.target
 Index: ioemu/Makefile.target
 ===================================================================
---- ioemu.orig/Makefile.target 2006-08-09 19:54:26.055548240 +0100
-+++ ioemu/Makefile.target      2006-08-09 21:29:37.834611244 +0100
+--- ioemu.orig/Makefile.target 2006-08-17 19:49:50.228216099 +0100
++++ ioemu/Makefile.target      2006-08-17 19:50:02.405870095 +0100
 @@ -357,6 +357,7 @@
  VL_OBJS+= fdc.o mc146818rtc.o serial.o pc.o
  VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o
@@ -12,8 +12,8 @@ Index: ioemu/Makefile.target
  ifeq ($(TARGET_BASE_ARCH), ppc)
 Index: ioemu/hw/pc.c
 ===================================================================
---- ioemu.orig/hw/pc.c 2006-08-09 19:54:26.133539447 +0100
-+++ ioemu/hw/pc.c      2006-08-09 21:30:30.188733212 +0100
+--- ioemu.orig/hw/pc.c 2006-08-17 19:49:59.312212039 +0100
++++ ioemu/hw/pc.c      2006-08-17 19:50:02.406869984 +0100
 @@ -874,13 +874,19 @@
  
      cmos_init(ram_size, boot_device, bs_table, timeoffset);
@@ -49,7 +49,7 @@ Index: ioemu/hw/piix4acpi.c
 Index: ioemu/hw/piix4acpi.c
 ===================================================================
 --- /dev/null  1970-01-01 00:00:00.000000000 +0000
-+++ ioemu/hw/piix4acpi.c       2006-08-09 21:32:04.400129788 +0100
++++ ioemu/hw/piix4acpi.c       2006-08-17 19:50:02.407869874 +0100
 @@ -0,0 +1,388 @@
 +/*
 + * PIIX4 ACPI controller emulation
@@ -441,8 +441,8 @@ Index: ioemu/hw/piix4acpi.c
 +}
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-09 19:54:26.135539222 +0100
-+++ ioemu/vl.c 2006-08-09 21:29:38.067585110 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:49:59.315211708 +0100
++++ ioemu/vl.c 2006-08-17 19:50:02.410869542 +0100
 @@ -156,7 +156,7 @@
  #else
  #define MAX_CPUS 1
@@ -488,9 +488,9 @@ Index: ioemu/vl.c
      }
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-09 19:54:26.136539109 +0100
-+++ ioemu/vl.h 2006-08-09 21:31:21.772931536 +0100
-@@ -167,6 +167,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:49:59.316211597 +0100
++++ ioemu/vl.h 2006-08-17 19:50:02.411869432 +0100
+@@ -168,6 +168,7 @@
  extern int kqemu_allowed;
  extern int win2k_install_hack;
  extern int usb_enabled;
@@ -498,7 +498,7 @@ Index: ioemu/vl.h
  extern int smp_cpus;
  
  /* XXX: make it dynamic */
-@@ -922,6 +923,9 @@
+@@ -923,6 +924,9 @@
  void piix4_pm_init(PCIBus *bus, int devfn);
  void acpi_bios_init(void);
  
@@ -510,8 +510,8 @@ Index: ioemu/vl.h
  extern QEMUMachine isapc_machine;
 Index: ioemu/hw/piix_pci.c
 ===================================================================
---- ioemu.orig/hw/piix_pci.c   2006-08-09 19:54:19.636318228 +0100
-+++ ioemu/hw/piix_pci.c        2006-08-09 19:54:26.152537305 +0100
+--- ioemu.orig/hw/piix_pci.c   2006-08-17 19:38:05.806252180 +0100
++++ ioemu/hw/piix_pci.c        2006-08-17 19:50:02.411869432 +0100
 @@ -241,7 +241,7 @@
  static uint32_t pci_bios_io_addr;
  static uint32_t pci_bios_mem_addr;
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/acpi-timer-support
--- a/tools/ioemu/patches/acpi-timer-support    Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/acpi-timer-support    Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/piix4acpi.c
 Index: ioemu/hw/piix4acpi.c
 ===================================================================
---- ioemu.orig/hw/piix4acpi.c  2006-08-09 20:00:56.118008198 +0100
-+++ ioemu/hw/piix4acpi.c       2006-08-09 20:04:54.375299065 +0100
+--- ioemu.orig/hw/piix4acpi.c  2006-08-17 19:50:02.407869874 +0100
++++ ioemu/hw/piix4acpi.c       2006-08-17 19:50:05.060576667 +0100
 @@ -24,31 +24,30 @@
   */
  
@@ -184,7 +184,7 @@ Index: ioemu/hw/piix4acpi.c
  }
 -                                                                              
                        
  
- /* PIIX4 acpi pci configuration space, func 3 */
+ /* PIIX4 acpi pci configuration space, func 2 */
  void pci_piix4_acpi_init(PCIBus *bus, int devfn)
 @@ -384,5 +383,5 @@
      pci_register_io_region((PCIDevice *)d, 4, 0x10,
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/domain-destroy
--- a/tools/ioemu/patches/domain-destroy        Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/domain-destroy        Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/monitor.c
 Index: ioemu/monitor.c
 ===================================================================
---- ioemu.orig/monitor.c       2006-08-08 11:27:48.555190337 +0100
-+++ ioemu/monitor.c    2006-08-08 11:27:53.984584612 +0100
+--- ioemu.orig/monitor.c       2006-08-17 19:37:36.489509621 +0100
++++ ioemu/monitor.c    2006-08-17 19:49:44.491850141 +0100
 @@ -308,6 +308,7 @@
  
  static void do_quit(void)
@@ -12,8 +12,8 @@ Index: ioemu/monitor.c
  
 Index: ioemu/target-i386-dm/helper2.c
 ===================================================================
---- ioemu.orig/target-i386-dm/helper2.c        2006-08-08 11:27:53.063687351 
+0100
-+++ ioemu/target-i386-dm/helper2.c     2006-08-08 11:27:54.011581601 +0100
+--- ioemu.orig/target-i386-dm/helper2.c        2006-08-17 19:49:40.116333768 
+0100
++++ ioemu/target-i386-dm/helper2.c     2006-08-17 19:49:44.491850141 +0100
 @@ -488,5 +488,25 @@
              xc_evtchn_notify(xce_handle, ioreq_local_port[send_vcpu]);
          }
@@ -42,9 +42,9 @@ Index: ioemu/target-i386-dm/helper2.c
 +}
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-08 11:27:53.067686905 +0100
-+++ ioemu/vl.h 2006-08-08 11:27:54.061576023 +0100
-@@ -1189,4 +1189,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:49:40.120333326 +0100
++++ ioemu/vl.h 2006-08-17 19:49:44.492850031 +0100
+@@ -1190,4 +1190,7 @@
  void kqemu_record_dump(void);
  
  extern char domain_name[];
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/domain-reset
--- a/tools/ioemu/patches/domain-reset  Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/domain-reset  Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/target-i386-dm/helper2.c
 Index: ioemu/target-i386-dm/helper2.c
 ===================================================================
---- ioemu.orig/target-i386-dm/helper2.c        2006-08-08 11:27:45.566523765 
+0100
-+++ ioemu/target-i386-dm/helper2.c     2006-08-08 11:27:53.063687351 +0100
+--- ioemu.orig/target-i386-dm/helper2.c        2006-08-17 19:37:36.530505066 
+0100
++++ ioemu/target-i386-dm/helper2.c     2006-08-17 19:49:40.116333768 +0100
 @@ -127,6 +127,25 @@
  /* called from main_cpu_reset */
  void cpu_reset(CPUX86State *env)
@@ -41,8 +41,8 @@ Index: ioemu/target-i386-dm/helper2.c
          /* Wait up to 10 msec. */
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-08 11:27:52.994695048 +0100
-+++ ioemu/vl.c 2006-08-08 11:27:53.066687017 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:49:39.442408257 +0100
++++ ioemu/vl.c 2006-08-17 19:49:40.119333436 +0100
 @@ -4948,7 +4948,7 @@
  } QEMUResetEntry;
  
@@ -54,9 +54,9 @@ Index: ioemu/vl.c
  
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-08 11:27:48.757167803 +0100
-+++ ioemu/vl.h 2006-08-08 11:27:53.067686905 +0100
-@@ -130,6 +130,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:47:32.680418959 +0100
++++ ioemu/vl.h 2006-08-17 19:49:40.120333326 +0100
+@@ -131,6 +131,7 @@
  
  void qemu_register_reset(QEMUResetHandler *func, void *opaque);
  void qemu_system_reset_request(void);
@@ -64,7 +64,7 @@ Index: ioemu/vl.h
  void qemu_system_shutdown_request(void);
  void qemu_system_powerdown_request(void);
  #if !defined(TARGET_SPARC)
-@@ -139,6 +140,8 @@
+@@ -140,6 +141,8 @@
  void qemu_system_powerdown(void);
  #endif
  
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/domain-timeoffset
--- a/tools/ioemu/patches/domain-timeoffset     Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/domain-timeoffset     Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/mc146818rtc.c
 Index: ioemu/hw/mc146818rtc.c
 ===================================================================
---- ioemu.orig/hw/mc146818rtc.c        2006-08-09 21:32:18.709516404 +0100
-+++ ioemu/hw/mc146818rtc.c     2006-08-09 21:32:24.723838065 +0100
+--- ioemu.orig/hw/mc146818rtc.c        2006-08-17 19:58:03.222720593 +0100
++++ ioemu/hw/mc146818rtc.c     2006-08-17 19:58:08.528134087 +0100
 @@ -178,10 +178,27 @@
      }
  }
@@ -46,8 +46,8 @@ Index: ioemu/hw/mc146818rtc.c
  static void rtc_copy_date(RTCState *s)
 Index: ioemu/hw/pc.c
 ===================================================================
---- ioemu.orig/hw/pc.c 2006-08-09 21:32:24.449868968 +0100
-+++ ioemu/hw/pc.c      2006-08-09 21:32:24.724837952 +0100
+--- ioemu.orig/hw/pc.c 2006-08-17 19:58:08.252164595 +0100
++++ ioemu/hw/pc.c      2006-08-17 19:58:08.529133976 +0100
 @@ -159,7 +159,7 @@
  }
  
@@ -117,8 +117,8 @@ Index: ioemu/hw/pc.c
  QEMUMachine pc_machine = {
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-09 21:32:24.591852952 +0100
-+++ ioemu/vl.c 2006-08-09 21:32:24.727837614 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:58:08.395148788 +0100
++++ ioemu/vl.c 2006-08-17 19:58:08.532133645 +0100
 @@ -163,6 +163,8 @@
  
  int xc_handle;
@@ -162,7 +162,7 @@ Index: ioemu/vl.c
              }
          }
      }
-@@ -6489,7 +6497,8 @@
+@@ -6507,7 +6515,8 @@
  
      machine->init(ram_size, vga_ram_size, boot_device,
                    ds, fd_filename, snapshot,
@@ -174,9 +174,9 @@ Index: ioemu/vl.c
      if (usb_enabled) {
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-09 21:32:24.454868404 +0100
-+++ ioemu/vl.h 2006-08-09 21:32:24.728837501 +0100
-@@ -575,7 +575,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:58:08.257164042 +0100
++++ ioemu/vl.h 2006-08-17 19:58:08.532133645 +0100
+@@ -576,7 +576,7 @@
                                   int boot_device,
               DisplayState *ds, const char **fd_filename, int snapshot,
               const char *kernel_filename, const char *kernel_cmdline,
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/hypervisor-pit
--- a/tools/ioemu/patches/hypervisor-pit        Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/hypervisor-pit        Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/Makefile.target
 Index: ioemu/Makefile.target
 ===================================================================
---- ioemu.orig/Makefile.target 2006-08-06 02:22:26.380544784 +0100
-+++ ioemu/Makefile.target      2006-08-06 02:23:23.059226607 +0100
+--- ioemu.orig/Makefile.target 2006-08-17 19:49:33.813030472 +0100
++++ ioemu/Makefile.target      2006-08-17 19:49:50.228216099 +0100
 @@ -354,7 +354,7 @@
  ifeq ($(TARGET_BASE_ARCH), i386)
  # Hardware support
@@ -13,8 +13,8 @@ Index: ioemu/Makefile.target
  DEFINES += -DHAS_AUDIO
 Index: ioemu/hw/pc.c
 ===================================================================
---- ioemu.orig/hw/pc.c 2006-08-06 02:22:35.588518336 +0100
-+++ ioemu/hw/pc.c      2006-08-06 02:23:07.875919141 +0100
+--- ioemu.orig/hw/pc.c 2006-08-17 19:49:35.507843144 +0100
++++ ioemu/hw/pc.c      2006-08-17 19:49:50.229215988 +0100
 @@ -38,7 +38,9 @@
  
  static fdctrl_t *floppy_controller;
@@ -38,8 +38,8 @@ Index: ioemu/hw/pc.c
          pic_set_alt_irq_func(isa_pic, ioapic_set_irq, ioapic);
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-06 02:23:05.216215627 +0100
-+++ ioemu/vl.c 2006-08-06 02:23:07.878918807 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:49:48.566399780 +0100
++++ ioemu/vl.c 2006-08-17 19:49:50.231215767 +0100
 @@ -5570,6 +5570,7 @@
  
  #ifdef HAS_AUDIO
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/ide-hd-multithread
--- a/tools/ioemu/patches/ide-hd-multithread    Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/ide-hd-multithread    Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/ide.c
 Index: ioemu/hw/ide.c
 ===================================================================
---- ioemu.orig/hw/ide.c        2006-08-06 02:03:50.520919718 +0100
-+++ ioemu/hw/ide.c     2006-08-06 02:23:41.153209614 +0100
+--- ioemu.orig/hw/ide.c        2006-08-17 19:37:36.267534285 +0100
++++ ioemu/hw/ide.c     2006-08-17 19:49:57.830375828 +0100
 @@ -22,6 +22,7 @@
   * THE SOFTWARE.
   */
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/ioemu-ia64
--- a/tools/ioemu/patches/ioemu-ia64    Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/ioemu-ia64    Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/iommu.c
 Index: ioemu/hw/iommu.c
 ===================================================================
---- ioemu.orig/hw/iommu.c      2006-08-15 13:51:31.425498388 +0100
-+++ ioemu/hw/iommu.c   2006-08-15 13:51:35.834011166 +0100
+--- ioemu.orig/hw/iommu.c      2006-08-17 19:37:36.791476068 +0100
++++ ioemu/hw/iommu.c   2006-08-17 19:48:27.357375720 +0100
 @@ -82,7 +82,11 @@
  #define IOPTE_VALID         0x00000002 /* IOPTE is valid */
  #define IOPTE_WAZ           0x00000001 /* Write as zeros */
@@ -16,8 +16,8 @@ Index: ioemu/hw/iommu.c
  
 Index: ioemu/cpu-all.h
 ===================================================================
---- ioemu.orig/cpu-all.h       2006-08-15 13:51:35.772018017 +0100
-+++ ioemu/cpu-all.h    2006-08-15 13:51:35.835011055 +0100
+--- ioemu.orig/cpu-all.h       2006-08-17 19:37:36.791476068 +0100
++++ ioemu/cpu-all.h    2006-08-17 19:48:27.358375609 +0100
 @@ -835,6 +835,31 @@
                  :"=m" (*(volatile long *)addr)
                  :"dIr" (nr));
@@ -52,21 +52,21 @@ Index: ioemu/cpu-all.h
  /* memory API */
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-15 13:51:35.824012271 +0100
-+++ ioemu/vl.c 2006-08-15 13:51:46.770802425 +0100
-@@ -6140,6 +6140,11 @@
-     /* init the memory */
-     phys_ram_size = ram_size + vga_ram_size + bios_size;
+--- ioemu.orig/vl.c    2006-08-17 19:47:08.538087284 +0100
++++ ioemu/vl.c 2006-08-17 19:57:50.666108706 +0100
+@@ -6144,6 +6144,11 @@
+ 
+     xc_handle = xc_interface_open();
  
 +#if defined (__ia64__)
 +    if (ram_size > MMIO_START)
-+      ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
++        ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
 +#endif
 +
- #ifdef CONFIG_DM
- 
      nr_pages = ram_size/PAGE_SIZE;
-@@ -6151,6 +6156,7 @@
+     tmp_nr_pages = nr_pages;
+ 
+@@ -6161,6 +6166,7 @@
          exit(-1);
      }
  
@@ -74,7 +74,7 @@ Index: ioemu/vl.c
      if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
          fprintf(logfile, "xc_get_pfn_list returned error %d\n", errno);
          exit(-1);
-@@ -6173,6 +6179,41 @@
+@@ -6191,6 +6197,41 @@
  
      free(page_array);
  
@@ -100,9 +100,9 @@ Index: ioemu/vl.c
 +    }
 +
 +    if (ram_size > MMIO_START) {      
-+      for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++)
-+          page_array[MMIO_START >> PAGE_SHIFT + i] =
-+              page_array[IO_PAGE_START >> PAGE_SHIFT + 1];
++        for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++)
++            page_array[MMIO_START >> PAGE_SHIFT + i] =
++                page_array[IO_PAGE_START >> PAGE_SHIFT + 1];
 +    }
 +
 +    phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
@@ -118,8 +118,8 @@ Index: ioemu/vl.c
      phys_ram_base = qemu_vmalloc(phys_ram_size);
 Index: ioemu/target-i386-dm/exec-dm.c
 ===================================================================
---- ioemu.orig/target-i386-dm/exec-dm.c        2006-08-15 13:51:35.705025421 
+0100
-+++ ioemu/target-i386-dm/exec-dm.c     2006-08-15 13:51:51.987225890 +0100
+--- ioemu.orig/target-i386-dm/exec-dm.c        2006-08-17 19:37:36.792475957 
+0100
++++ ioemu/target-i386-dm/exec-dm.c     2006-08-17 19:48:27.361375278 +0100
 @@ -341,6 +341,23 @@
      return io_mem_read[io_index >> IO_MEM_SHIFT];
  }
@@ -156,8 +156,8 @@ Index: ioemu/target-i386-dm/exec-dm.c
              if (io_index) {
 Index: ioemu/exec-all.h
 ===================================================================
---- ioemu.orig/exec-all.h      2006-08-15 13:51:35.682027963 +0100
-+++ ioemu/exec-all.h   2006-08-15 13:51:35.839010613 +0100
+--- ioemu.orig/exec-all.h      2006-08-17 19:37:36.791476068 +0100
++++ ioemu/exec-all.h   2006-08-17 19:48:27.362375167 +0100
 @@ -462,12 +462,13 @@
  }
  #endif
@@ -177,8 +177,8 @@ Index: ioemu/exec-all.h
  
 Index: ioemu/target-i386-dm/cpu.h
 ===================================================================
---- ioemu.orig/target-i386-dm/cpu.h    2006-08-15 13:51:35.704025531 +0100
-+++ ioemu/target-i386-dm/cpu.h 2006-08-15 13:51:35.839010613 +0100
+--- ioemu.orig/target-i386-dm/cpu.h    2006-08-17 19:37:36.792475957 +0100
++++ ioemu/target-i386-dm/cpu.h 2006-08-17 19:48:27.362375167 +0100
 @@ -80,7 +80,11 @@
  /* helper2.c */
  int main_loop(void);
@@ -194,7 +194,7 @@ Index: ioemu/ia64_intrinsic.h
 Index: ioemu/ia64_intrinsic.h
 ===================================================================
 --- /dev/null  1970-01-01 00:00:00.000000000 +0000
-+++ ioemu/ia64_intrinsic.h     2006-08-15 13:51:35.840010502 +0100
++++ ioemu/ia64_intrinsic.h     2006-08-17 19:48:27.363375057 +0100
 @@ -0,0 +1,276 @@
 +#ifndef IA64_INTRINSIC_H
 +#define IA64_INTRINSIC_H
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/qemu-allow-disable-sdl
--- a/tools/ioemu/patches/qemu-allow-disable-sdl        Sun Aug 20 11:07:52 
2006 -0400
+++ b/tools/ioemu/patches/qemu-allow-disable-sdl        Sun Aug 20 11:08:45 
2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/configure
 Index: ioemu/configure
 ===================================================================
---- ioemu.orig/configure       2006-08-06 02:15:01.771108621 +0100
-+++ ioemu/configure    2006-08-06 02:42:26.213918476 +0100
+--- ioemu.orig/configure       2006-08-17 19:37:35.772589281 +0100
++++ ioemu/configure    2006-08-17 19:50:24.735401975 +0100
 @@ -228,8 +228,6 @@
    ;;
    --enable-cocoa) cocoa="yes" ; coreaudio="yes" ; sdl="no"
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/qemu-fix-memset-args
--- a/tools/ioemu/patches/qemu-fix-memset-args  Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/qemu-fix-memset-args  Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/audio/audio.c
 Index: ioemu/audio/audio.c
 ===================================================================
---- ioemu.orig/audio/audio.c   2006-08-06 02:03:50.013976165 +0100
-+++ ioemu/audio/audio.c        2006-08-06 02:42:28.991609008 +0100
+--- ioemu.orig/audio/audio.c   2006-08-17 19:37:35.755591169 +0100
++++ ioemu/audio/audio.c        2006-08-17 19:50:26.867166346 +0100
 @@ -605,11 +605,11 @@
      }
  
diff -r 96d6f9cfed6e -r 4cffec02b478 
tools/ioemu/patches/qemu-fix-write-to-disk-synchronous
--- a/tools/ioemu/patches/qemu-fix-write-to-disk-synchronous    Sun Aug 20 
11:07:52 2006 -0400
+++ b/tools/ioemu/patches/qemu-fix-write-to-disk-synchronous    Sun Aug 20 
11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/block-vmdk.c
 Index: ioemu/block-vmdk.c
 ===================================================================
---- ioemu.orig/block-vmdk.c    2006-08-06 02:03:45.756450226 +0100
-+++ ioemu/block-vmdk.c 2006-08-06 02:42:31.884286728 +0100
+--- ioemu.orig/block-vmdk.c    2006-08-17 19:37:35.737593169 +0100
++++ ioemu/block-vmdk.c 2006-08-17 19:50:28.884943317 +0100
 @@ -96,7 +96,7 @@
      uint32_t magic;
      int l1_size;
@@ -13,8 +13,8 @@ Index: ioemu/block-vmdk.c
          if (fd < 0)
 Index: ioemu/block-qcow.c
 ===================================================================
---- ioemu.orig/block-qcow.c    2006-08-06 02:03:45.754450449 +0100
-+++ ioemu/block-qcow.c 2006-08-06 02:42:31.885286616 +0100
+--- ioemu.orig/block-qcow.c    2006-08-17 19:37:35.737593169 +0100
++++ ioemu/block-qcow.c 2006-08-17 19:50:28.885943206 +0100
 @@ -95,7 +95,7 @@
      int fd, len, i, shift;
      QCowHeader header;
@@ -26,8 +26,8 @@ Index: ioemu/block-qcow.c
          if (fd < 0)
 Index: ioemu/block-bochs.c
 ===================================================================
---- ioemu.orig/block-bochs.c   2006-08-06 01:55:02.225741179 +0100
-+++ ioemu/block-bochs.c        2006-08-06 02:42:31.885286616 +0100
+--- ioemu.orig/block-bochs.c   2006-08-17 19:37:35.737593169 +0100
++++ ioemu/block-bochs.c        2006-08-17 19:50:28.885943206 +0100
 @@ -91,7 +91,7 @@
      int fd, i;
      struct bochs_header bochs;
@@ -39,8 +39,8 @@ Index: ioemu/block-bochs.c
          if (fd < 0)
 Index: ioemu/block.c
 ===================================================================
---- ioemu.orig/block.c 2006-08-06 02:42:18.880735483 +0100
-+++ ioemu/block.c      2006-08-06 02:42:31.886286505 +0100
+--- ioemu.orig/block.c 2006-08-17 19:50:18.872050063 +0100
++++ ioemu/block.c      2006-08-17 19:50:28.885943206 +0100
 @@ -685,7 +685,7 @@
      int rv;
  #endif
@@ -52,8 +52,8 @@ Index: ioemu/block.c
          if (fd < 0)
 Index: ioemu/block-cow.c
 ===================================================================
---- ioemu.orig/block-cow.c     2006-08-06 02:03:45.751450783 +0100
-+++ ioemu/block-cow.c  2006-08-06 02:42:31.886286505 +0100
+--- ioemu.orig/block-cow.c     2006-08-17 19:37:35.738593058 +0100
++++ ioemu/block-cow.c  2006-08-17 19:50:28.886943095 +0100
 @@ -69,7 +69,7 @@
      struct cow_header_v2 cow_header;
      int64_t size;
@@ -65,8 +65,8 @@ Index: ioemu/block-cow.c
          if (fd < 0)
 Index: ioemu/block-cloop.c
 ===================================================================
---- ioemu.orig/block-cloop.c   2006-08-06 01:55:02.226741067 +0100
-+++ ioemu/block-cloop.c        2006-08-06 02:42:31.886286505 +0100
+--- ioemu.orig/block-cloop.c   2006-08-17 19:37:35.737593169 +0100
++++ ioemu/block-cloop.c        2006-08-17 19:50:28.886943095 +0100
 @@ -55,7 +55,7 @@
      BDRVCloopState *s = bs->opaque;
      uint32_t offsets_size,max_compressed_block_size=1,i;
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/serial-non-block
--- a/tools/ioemu/patches/serial-non-block      Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/serial-non-block      Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/vl.c
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-06 02:23:29.827472129 +0100
-+++ ioemu/vl.c 2006-08-06 02:23:36.856688561 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:49:52.162002356 +0100
++++ ioemu/vl.c 2006-08-17 19:49:56.273547905 +0100
 @@ -1175,19 +1175,34 @@
  
  static int unix_write(int fd, const uint8_t *buf, int len1)
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/series
--- a/tools/ioemu/patches/series        Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/series        Sun Aug 20 11:08:45 2006 -0400
@@ -41,3 +41,5 @@ qemu-fix-memset-args
 qemu-fix-memset-args
 qemu-fix-write-to-disk-synchronous
 xen-support-buffered-ioreqs
+qemu-daemonize
+xen-platform-device
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/shadow-vram
--- a/tools/ioemu/patches/shadow-vram   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/shadow-vram   Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/vga.c
 Index: ioemu/hw/vga.c
 ===================================================================
---- ioemu.orig/hw/vga.c        2006-08-06 02:23:29.824472464 +0100
-+++ ioemu/hw/vga.c     2006-08-06 02:23:33.873021159 +0100
+--- ioemu.orig/hw/vga.c        2006-08-17 19:49:52.159002688 +0100
++++ ioemu/hw/vga.c     2006-08-17 19:49:54.575735565 +0100
 @@ -1359,6 +1359,105 @@
      }
  }
@@ -137,8 +137,8 @@ Index: ioemu/hw/vga.c
      s->vram_size = vga_ram_size;
 Index: ioemu/hw/vga_int.h
 ===================================================================
---- ioemu.orig/hw/vga_int.h    2006-08-06 02:23:29.824472464 +0100
-+++ ioemu/hw/vga_int.h 2006-08-06 02:23:33.874021048 +0100
+--- ioemu.orig/hw/vga_int.h    2006-08-17 19:49:52.159002688 +0100
++++ ioemu/hw/vga_int.h 2006-08-17 19:49:54.575735565 +0100
 @@ -79,6 +79,7 @@
  
  #define VGA_STATE_COMMON                                                \
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/shared-vram
--- a/tools/ioemu/patches/shared-vram   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/shared-vram   Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/cirrus_vga.c
 Index: ioemu/hw/cirrus_vga.c
 ===================================================================
---- ioemu.orig/hw/cirrus_vga.c 2006-08-06 01:55:02.822674718 +0100
-+++ ioemu/hw/cirrus_vga.c      2006-08-06 02:23:29.822472686 +0100
+--- ioemu.orig/hw/cirrus_vga.c 2006-08-17 19:37:36.372522620 +0100
++++ ioemu/hw/cirrus_vga.c      2006-08-17 19:49:52.157002909 +0100
 @@ -28,6 +28,9 @@
   */
  #include "vl.h"
@@ -176,8 +176,8 @@ Index: ioemu/hw/cirrus_vga.c
  }
 Index: ioemu/hw/pc.c
 ===================================================================
---- ioemu.orig/hw/pc.c 2006-08-06 02:23:07.875919141 +0100
-+++ ioemu/hw/pc.c      2006-08-06 02:23:29.823472575 +0100
+--- ioemu.orig/hw/pc.c 2006-08-17 19:49:50.229215988 +0100
++++ ioemu/hw/pc.c      2006-08-17 19:49:52.158002799 +0100
 @@ -790,14 +790,14 @@
      if (cirrus_vga_enabled) {
          if (pci_enabled) {
@@ -198,8 +198,8 @@ Index: ioemu/hw/pc.c
  
 Index: ioemu/hw/vga.c
 ===================================================================
---- ioemu.orig/hw/vga.c        2006-08-06 02:22:46.606290142 +0100
-+++ ioemu/hw/vga.c     2006-08-06 02:23:29.824472464 +0100
+--- ioemu.orig/hw/vga.c        2006-08-17 19:49:37.764593706 +0100
++++ ioemu/hw/vga.c     2006-08-17 19:49:52.159002688 +0100
 @@ -1858,6 +1858,7 @@
      /* TODO: add vbe support if enabled */
  }
@@ -251,8 +251,8 @@ Index: ioemu/hw/vga.c
  
 Index: ioemu/hw/vga_int.h
 ===================================================================
---- ioemu.orig/hw/vga_int.h    2006-08-06 02:14:09.797902638 +0100
-+++ ioemu/hw/vga_int.h 2006-08-06 02:23:29.824472464 +0100
+--- ioemu.orig/hw/vga_int.h    2006-08-17 19:37:36.372522620 +0100
++++ ioemu/hw/vga_int.h 2006-08-17 19:49:52.159002688 +0100
 @@ -169,5 +169,6 @@
                               unsigned int color0, unsigned int color1,
                               unsigned int color_xor);
@@ -262,8 +262,8 @@ Index: ioemu/hw/vga_int.h
  extern const uint8_t gr_mask[16];
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-06 02:23:07.878918807 +0100
-+++ ioemu/vl.c 2006-08-06 02:23:29.827472129 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:49:50.231215767 +0100
++++ ioemu/vl.c 2006-08-17 19:49:52.162002356 +0100
 @@ -5693,6 +5693,78 @@
  
  #define MAX_NET_CLIENTS 32
@@ -345,9 +345,9 @@ Index: ioemu/vl.c
  #ifdef CONFIG_GDBSTUB
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-06 02:23:02.271543880 +0100
-+++ ioemu/vl.h 2006-08-06 02:23:29.828472018 +0100
-@@ -144,6 +144,13 @@
+--- ioemu.orig/vl.h    2006-08-17 19:49:44.492850031 +0100
++++ ioemu/vl.h 2006-08-17 19:49:52.163002246 +0100
+@@ -145,6 +145,13 @@
  
  void main_loop_wait(int timeout);
  
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/support-xm-console
--- a/tools/ioemu/patches/support-xm-console    Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/support-xm-console    Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/vl.c
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-06 02:22:59.254880158 +0100
-+++ ioemu/vl.c 2006-08-06 02:23:05.216215627 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:49:40.119333436 +0100
++++ ioemu/vl.c 2006-08-17 19:49:48.566399780 +0100
 @@ -1536,26 +1536,65 @@
      return chr;
  }
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-access-monitor-vt
--- a/tools/ioemu/patches/vnc-access-monitor-vt Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/vnc-access-monitor-vt Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/vnc.c
 Index: ioemu/vnc.c
 ===================================================================
---- ioemu.orig/vnc.c   2006-08-07 17:44:33.084748631 +0100
-+++ ioemu/vnc.c        2006-08-07 17:44:33.224733389 +0100
+--- ioemu.orig/vnc.c   2006-08-17 19:50:14.623519661 +0100
++++ ioemu/vnc.c        2006-08-17 19:50:15.956372339 +0100
 @@ -32,6 +32,10 @@
  #include "vnc_keysym.h"
  #include "keymaps.c"
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-cleanup
--- a/tools/ioemu/patches/vnc-cleanup   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/vnc-cleanup   Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/vnc.c
 Index: ioemu/vnc.c
 ===================================================================
---- ioemu.orig/vnc.c   2006-08-07 17:42:21.888055419 +0100
-+++ ioemu/vnc.c        2006-08-07 17:42:28.001363557 +0100
+--- ioemu.orig/vnc.c   2006-08-17 19:37:36.091553839 +0100
++++ ioemu/vnc.c        2006-08-17 19:50:10.313996001 +0100
 @@ -143,13 +143,16 @@
  static void vnc_dpy_update(DisplayState *ds, int x, int y, int w, int h)
  {
@@ -65,8 +65,8 @@ Index: ioemu/vnc.c
  static void vnc_timer_init(VncState *vs)
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-07 17:42:27.807385506 +0100
-+++ ioemu/vl.c 2006-08-07 17:42:28.004363230 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:50:02.410869542 +0100
++++ ioemu/vl.c 2006-08-17 19:50:10.316995669 +0100
 @@ -5120,10 +5120,10 @@
          /* XXX: better handling of removal */
          for(ioh = first_io_handler; ioh != NULL; ioh = ioh_next) {
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-display-find-unused
--- a/tools/ioemu/patches/vnc-display-find-unused       Sun Aug 20 11:07:52 
2006 -0400
+++ b/tools/ioemu/patches/vnc-display-find-unused       Sun Aug 20 11:08:45 
2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/vnc.c
 Index: ioemu/vnc.c
 ===================================================================
---- ioemu.orig/vnc.c   2006-08-09 14:44:44.721942535 +0100
-+++ ioemu/vnc.c        2006-08-09 14:52:37.262165292 +0100
+--- ioemu.orig/vnc.c   2006-08-17 19:50:15.956372339 +0100
++++ ioemu/vnc.c        2006-08-17 19:50:17.083247783 +0100
 @@ -1183,7 +1183,7 @@
      }
  }
@@ -50,8 +50,8 @@ Index: ioemu/vnc.c
  int vnc_start_viewer(int port)
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-09 14:44:44.721942535 +0100
-+++ ioemu/vl.c 2006-08-09 14:52:06.783905832 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:50:13.152682236 +0100
++++ ioemu/vl.c 2006-08-17 19:50:17.086247452 +0100
 @@ -121,6 +121,7 @@
  static DisplayState display_state;
  int nographic;
@@ -104,7 +104,7 @@ Index: ioemu/vl.c
              }
          }
      }
-@@ -6465,7 +6475,7 @@
+@@ -6483,7 +6493,7 @@
      if (nographic) {
          dumb_display_init(ds);
      } else if (vnc_display != -1) {
@@ -115,9 +115,9 @@ Index: ioemu/vl.c
      } else {
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-09 14:44:44.721942535 +0100
-+++ ioemu/vl.h 2006-08-09 14:52:06.783905832 +0100
-@@ -784,7 +784,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:50:13.153682125 +0100
++++ ioemu/vl.h 2006-08-17 19:50:17.087247341 +0100
+@@ -785,7 +785,7 @@
  void cocoa_display_init(DisplayState *ds, int full_screen);
  
  /* vnc.c */
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-fixes
--- a/tools/ioemu/patches/vnc-fixes     Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/vnc-fixes     Sun Aug 20 11:08:45 2006 -0400
@@ -1,8 +1,8 @@ Index: ioemu/vl.c
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-07 17:42:28.004363230 +0100
-+++ ioemu/vl.c 2006-08-07 17:43:16.361100898 +0100
-@@ -6516,8 +6516,10 @@
+--- ioemu.orig/vl.c    2006-08-17 19:50:10.316995669 +0100
++++ ioemu/vl.c 2006-08-17 19:50:12.100798502 +0100
+@@ -6534,8 +6534,10 @@
          }
      }
  
@@ -17,8 +17,8 @@ Index: ioemu/vl.c
      if (use_gdbstub) {
 Index: ioemu/vnc.c
 ===================================================================
---- ioemu.orig/vnc.c   2006-08-07 17:42:28.001363557 +0100
-+++ ioemu/vnc.c        2006-08-07 17:43:33.593225293 +0100
+--- ioemu.orig/vnc.c   2006-08-17 19:50:10.313996001 +0100
++++ ioemu/vnc.c        2006-08-17 19:50:12.101798392 +0100
 @@ -3,6 +3,7 @@
   * 
   * Copyright (C) 2006 Anthony Liguori <anthony@xxxxxxxxxxxxx>
@@ -524,9 +524,9 @@ Index: ioemu/vnc.c
  }
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-07 17:42:27.807385506 +0100
-+++ ioemu/vl.h 2006-08-07 17:43:16.361100898 +0100
-@@ -318,6 +318,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:50:02.411869432 +0100
++++ ioemu/vl.h 2006-08-17 19:50:12.102798281 +0100
+@@ -319,6 +319,7 @@
  int is_graphic_console(void);
  CharDriverState *text_console_init(DisplayState *ds);
  void console_select(unsigned int index);
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-start-vncviewer
--- a/tools/ioemu/patches/vnc-start-vncviewer   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/vnc-start-vncviewer   Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/vnc.c
 Index: ioemu/vnc.c
 ===================================================================
---- ioemu.orig/vnc.c   2006-08-07 17:44:32.852773890 +0100
-+++ ioemu/vnc.c        2006-08-07 17:44:32.915767031 +0100
+--- ioemu.orig/vnc.c   2006-08-17 19:50:12.101798392 +0100
++++ ioemu/vnc.c        2006-08-17 19:50:13.149682567 +0100
 @@ -1175,3 +1175,25 @@
  
      vnc_dpy_resize(vs->ds, 640, 400);
@@ -30,8 +30,8 @@ Index: ioemu/vnc.c
 +}
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-07 17:44:32.851773999 +0100
-+++ ioemu/vl.c 2006-08-07 17:44:32.918766704 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:50:12.100798502 +0100
++++ ioemu/vl.c 2006-08-17 19:50:13.152682236 +0100
 @@ -120,6 +120,7 @@
  int bios_size;
  static DisplayState display_state;
@@ -82,7 +82,7 @@ Index: ioemu/vl.c
              }
          }
      }
-@@ -6458,6 +6466,8 @@
+@@ -6476,6 +6484,8 @@
          dumb_display_init(ds);
      } else if (vnc_display != -1) {
        vnc_display_init(ds, vnc_display);
@@ -93,9 +93,9 @@ Index: ioemu/vl.c
          sdl_display_init(ds, full_screen);
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-07 17:44:32.853773781 +0100
-+++ ioemu/vl.h 2006-08-07 17:44:32.919766595 +0100
-@@ -785,6 +785,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:50:12.102798281 +0100
++++ ioemu/vl.h 2006-08-17 19:50:13.153682125 +0100
+@@ -786,6 +786,7 @@
  
  /* vnc.c */
  void vnc_display_init(DisplayState *ds, int display);
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/vnc-title-domain-name
--- a/tools/ioemu/patches/vnc-title-domain-name Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/vnc-title-domain-name Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/vnc.c
 Index: ioemu/vnc.c
 ===================================================================
---- ioemu.orig/vnc.c   2006-08-07 17:44:32.915767031 +0100
-+++ ioemu/vnc.c        2006-08-07 17:44:33.084748631 +0100
+--- ioemu.orig/vnc.c   2006-08-17 19:50:13.149682567 +0100
++++ ioemu/vnc.c        2006-08-17 19:50:14.623519661 +0100
 @@ -1014,6 +1014,7 @@
  
  static int protocol_client_init(VncState *vs, char *data, size_t len)
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/xen-mm
--- a/tools/ioemu/patches/xen-mm        Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/patches/xen-mm        Sun Aug 20 11:08:45 2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/hw/pc.c
 Index: ioemu/hw/pc.c
 ===================================================================
---- ioemu.orig/hw/pc.c 2006-08-06 02:15:39.708879311 +0100
-+++ ioemu/hw/pc.c      2006-08-06 02:18:18.875135656 +0100
+--- ioemu.orig/hw/pc.c 2006-08-17 19:36:00.588166019 +0100
++++ ioemu/hw/pc.c      2006-08-17 19:37:36.704485734 +0100
 @@ -646,7 +646,9 @@
      }
  
@@ -25,8 +25,8 @@ Index: ioemu/hw/pc.c
      isa_bios_size = bios_size;
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-06 02:18:12.550840673 +0100
-+++ ioemu/vl.c 2006-08-06 02:18:45.608155528 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:36:00.667157242 +0100
++++ ioemu/vl.c 2006-08-17 19:47:08.538087284 +0100
 @@ -158,6 +158,8 @@
  int acpi_enabled = 1;
  int fd_bootchk = 1;
@@ -40,7 +40,7 @@ Index: ioemu/vl.c
      QEMUMachine *machine;
      char usb_devices[MAX_USB_CMDLINE][128];
      int usb_devices_index;
-+    unsigned long nr_pages;
++    unsigned long nr_pages, tmp_nr_pages, shared_page_nr;
 +    xen_pfn_t *page_array;
 +    extern void *shared_page;
  
@@ -60,16 +60,26 @@ Index: ioemu/vl.c
                  break;
              case QEMU_OPTION_l:
                  {
-@@ -6133,12 +6140,49 @@
+@@ -6133,12 +6140,67 @@
      /* init the memory */
      phys_ram_size = ram_size + vga_ram_size + bios_size;
  
 +#ifdef CONFIG_DM
 +
-+    nr_pages = ram_size/PAGE_SIZE;
 +    xc_handle = xc_interface_open();
 +
-+    page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t));
++    nr_pages = ram_size/PAGE_SIZE;
++    tmp_nr_pages = nr_pages;
++
++#if defined(__i386__) || defined(__x86_64__)
++    if (ram_size > HVM_BELOW_4G_RAM_END) {
++        tmp_nr_pages += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
++        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
++    } else
++        shared_page_nr = nr_pages - 1;
++#endif
++
++    page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
 +    if (page_array == NULL) {
 +        fprintf(logfile, "malloc returned error %d\n", errno);
 +        exit(-1);
@@ -80,20 +90,28 @@ Index: ioemu/vl.c
 +        exit(-1);
 +    }
 +
++    if (ram_size > HVM_BELOW_4G_RAM_END)
++        for (i = 0; i < nr_pages - (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT); i++)
++            page_array[tmp_nr_pages - 1 - i] = page_array[nr_pages - 1 - i];
++
 +    phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
 +                                         PROT_READ|PROT_WRITE, page_array,
-+                                         nr_pages - 1);
-+    if (phys_ram_base == 0) {
-+        fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
++                                         tmp_nr_pages);
++    if (phys_ram_base == NULL) {
++        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
 +        exit(-1);
 +    }
 +
 +    shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
 +                                       PROT_READ|PROT_WRITE,
-+                                       page_array[nr_pages - 1]);
++                                       page_array[shared_page_nr]);
++    if (shared_page == NULL) {
++        fprintf(logfile, "map shared IO page returned error %d\n", errno);
++        exit(-1);
++    }
 +
-+    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", nr_pages - 1,
-+            (uint64_t)(page_array[nr_pages - 1]));
++    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
++            shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
 +
 +    free(page_array);
 +
@@ -110,3 +128,28 @@ Index: ioemu/vl.c
      /* we always create the cdrom drive, even if no disk is there */
      bdrv_init();
      if (cdrom_index >= 0) {
+Index: ioemu/hw/piix_pci.c
+===================================================================
+--- ioemu.orig/hw/piix_pci.c   2006-08-17 19:37:36.189542951 +0100
++++ ioemu/hw/piix_pci.c        2006-08-17 19:38:05.806252180 +0100
+@@ -399,7 +399,7 @@
+     uint8_t elcr[2];
+ 
+     pci_bios_io_addr = 0xc000;
+-    pci_bios_mem_addr = 0xf0000000;
++    pci_bios_mem_addr = HVM_BELOW_4G_MMIO_START;
+ 
+     /* activate IRQ mappings */
+     elcr[0] = 0x00;
+Index: ioemu/vl.h
+===================================================================
+--- ioemu.orig/vl.h    2006-08-17 19:37:36.529505177 +0100
++++ ioemu/vl.h 2006-08-17 19:47:32.680418959 +0100
+@@ -39,6 +39,7 @@
+ #include <sys/stat.h>
+ #include "xenctrl.h"
+ #include "xs.h"
++#include <xen/hvm/e820.h>
+ 
+ #ifndef O_LARGEFILE
+ #define O_LARGEFILE 0
diff -r 96d6f9cfed6e -r 4cffec02b478 
tools/ioemu/patches/xen-support-buffered-ioreqs
--- a/tools/ioemu/patches/xen-support-buffered-ioreqs   Sun Aug 20 11:07:52 
2006 -0400
+++ b/tools/ioemu/patches/xen-support-buffered-ioreqs   Sun Aug 20 11:08:45 
2006 -0400
@@ -1,38 +1,38 @@ Index: ioemu/vl.c
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-09 15:04:25.583508863 +0100
-+++ ioemu/vl.c 2006-08-09 15:04:26.034465993 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:50:22.277673633 +0100
++++ ioemu/vl.c 2006-08-17 19:55:21.878556486 +0100
 @@ -5838,6 +5838,7 @@
-     unsigned long nr_pages;
+     unsigned long nr_pages, tmp_nr_pages, shared_page_nr;
      xen_pfn_t *page_array;
      extern void *shared_page;
 +    extern void *buffered_io_page;
  
      char qemu_dm_logfilename[64];
  
-@@ -6388,12 +6389,17 @@
- 
-     phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
-                                          PROT_READ|PROT_WRITE, page_array,
--                                         nr_pages - 1);
-+                                         nr_pages - 3);
-     if (phys_ram_base == 0) {
-         fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
-         exit(-1);
-     }
+@@ -6419,6 +6420,18 @@
+     fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
+             shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
  
 +    /* not yet add for IA64 */
 +    buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-+                                       PROT_READ|PROT_WRITE,
-+                                       page_array[nr_pages - 3]);
++                                            PROT_READ|PROT_WRITE,
++                                            page_array[shared_page_nr - 2]);
++    if (buffered_io_page == NULL) {
++        fprintf(logfile, "map buffered IO page returned error %d\n", errno);
++        exit(-1);
++    }
 +
-     shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                        PROT_READ|PROT_WRITE,
-                                        page_array[nr_pages - 1]);
++    fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n",
++            shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2]));
++
+     free(page_array);
+ 
+ #elif defined(__ia64__)
 Index: ioemu/target-i386-dm/helper2.c
 ===================================================================
---- ioemu.orig/target-i386-dm/helper2.c        2006-08-09 15:04:24.105649313 
+0100
-+++ ioemu/target-i386-dm/helper2.c     2006-08-09 15:04:26.040465422 +0100
+--- ioemu.orig/target-i386-dm/helper2.c        2006-08-17 19:49:44.491850141 
+0100
++++ ioemu/target-i386-dm/helper2.c     2006-08-17 19:50:41.490549986 +0100
 @@ -76,6 +76,10 @@
  
  shared_iopage_t *shared_page = NULL;
diff -r 96d6f9cfed6e -r 4cffec02b478 
tools/ioemu/patches/xenstore-block-device-config
--- a/tools/ioemu/patches/xenstore-block-device-config  Sun Aug 20 11:07:52 
2006 -0400
+++ b/tools/ioemu/patches/xenstore-block-device-config  Sun Aug 20 11:08:45 
2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/Makefile.target
 Index: ioemu/Makefile.target
 ===================================================================
---- ioemu.orig/Makefile.target 2006-08-09 21:32:24.915816410 +0100
-+++ ioemu/Makefile.target      2006-08-09 21:32:25.500750429 +0100
+--- ioemu.orig/Makefile.target 2006-08-17 19:50:02.405870095 +0100
++++ ioemu/Makefile.target      2006-08-17 19:50:18.866050726 +0100
 @@ -358,6 +358,7 @@
  VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o
  VL_OBJS+= usb-uhci.o
@@ -13,7 +13,7 @@ Index: ioemu/xenstore.c
 Index: ioemu/xenstore.c
 ===================================================================
 --- /dev/null  1970-01-01 00:00:00.000000000 +0000
-+++ ioemu/xenstore.c   2006-08-09 21:32:25.501750317 +0100
++++ ioemu/xenstore.c   2006-08-17 19:50:18.867050616 +0100
 @@ -0,0 +1,187 @@
 +/*
 + * This file is subject to the terms and conditions of the GNU General
@@ -204,8 +204,8 @@ Index: ioemu/xenstore.c
 +}
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-09 21:32:25.438757422 +0100
-+++ ioemu/vl.c 2006-08-09 21:32:25.504749978 +0100
+--- ioemu.orig/vl.c    2006-08-17 19:50:17.086247452 +0100
++++ ioemu/vl.c 2006-08-17 19:50:18.870050284 +0100
 @@ -5243,9 +5243,11 @@
             "Standard options:\n"
             "-M machine      select emulated machine (-M ? for list)\n"
@@ -359,7 +359,7 @@ Index: ioemu/vl.c
  
      setvbuf(stdout, NULL, _IOLBF, 0);
      
-@@ -6417,6 +6448,7 @@
+@@ -6435,6 +6466,7 @@
  
  #endif /* !CONFIG_DM */
  
@@ -367,7 +367,7 @@ Index: ioemu/vl.c
      /* we always create the cdrom drive, even if no disk is there */
      bdrv_init();
      if (cdrom_index >= 0) {
-@@ -6443,6 +6475,7 @@
+@@ -6461,6 +6493,7 @@
              }
          }
      }
@@ -375,7 +375,7 @@ Index: ioemu/vl.c
  
      /* we always create at least one floppy disk */
      fd_table[0] = bdrv_new("fda");
-@@ -6521,6 +6554,8 @@
+@@ -6539,6 +6572,8 @@
          }
      }
  
@@ -386,8 +386,8 @@ Index: ioemu/vl.c
                    kernel_filename, kernel_cmdline, initrd_filename,
 Index: ioemu/monitor.c
 ===================================================================
---- ioemu.orig/monitor.c       2006-08-09 21:32:24.238892765 +0100
-+++ ioemu/monitor.c    2006-08-09 21:32:25.505749865 +0100
+--- ioemu.orig/monitor.c       2006-08-17 19:49:44.491850141 +0100
++++ ioemu/monitor.c    2006-08-17 19:50:18.871050174 +0100
 @@ -24,6 +24,7 @@
  #include "vl.h"
  #include "disas.h"
@@ -416,8 +416,8 @@ Index: ioemu/monitor.c
      int i;
 Index: ioemu/block.c
 ===================================================================
---- ioemu.orig/block.c 2006-08-09 21:32:18.339558126 +0100
-+++ ioemu/block.c      2006-08-09 21:32:25.506749753 +0100
+--- ioemu.orig/block.c 2006-08-17 19:37:35.865578948 +0100
++++ ioemu/block.c      2006-08-17 19:50:18.872050063 +0100
 @@ -758,6 +758,7 @@
  static void raw_close(BlockDriverState *bs)
  {
@@ -428,9 +428,9 @@ Index: ioemu/block.c
  
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-09 21:32:25.439757309 +0100
-+++ ioemu/vl.h 2006-08-09 21:32:25.506749753 +0100
-@@ -1187,6 +1187,8 @@
+--- ioemu.orig/vl.h    2006-08-17 19:50:17.087247341 +0100
++++ ioemu/vl.h 2006-08-17 19:50:18.872050063 +0100
+@@ -1188,6 +1188,8 @@
  void term_print_help(void);
  void monitor_readline(const char *prompt, int is_password,
                        char *buf, int buf_size);
@@ -439,7 +439,7 @@ Index: ioemu/vl.h
  
  /* readline.c */
  typedef void ReadLineFunc(void *opaque, const char *str);
-@@ -1199,6 +1201,13 @@
+@@ -1200,6 +1202,13 @@
  void readline_start(const char *prompt, int is_password,
                      ReadLineFunc *readline_func, void *opaque);
  
@@ -455,8 +455,8 @@ Index: ioemu/vl.h
  extern char domain_name[];
 Index: ioemu/hw/ide.c
 ===================================================================
---- ioemu.orig/hw/ide.c        2006-08-09 21:32:24.658845396 +0100
-+++ ioemu/hw/ide.c     2006-08-09 21:32:25.508749527 +0100
+--- ioemu.orig/hw/ide.c        2006-08-17 19:49:57.830375828 +0100
++++ ioemu/hw/ide.c     2006-08-17 19:50:18.874049842 +0100
 @@ -1158,6 +1158,7 @@
          } else {
              ide_atapi_cmd_error(s, SENSE_NOT_READY, 
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/patches/xenstore-write-vnc-port
--- a/tools/ioemu/patches/xenstore-write-vnc-port       Sun Aug 20 11:07:52 
2006 -0400
+++ b/tools/ioemu/patches/xenstore-write-vnc-port       Sun Aug 20 11:08:45 
2006 -0400
@@ -1,7 +1,7 @@ Index: ioemu/xenstore.c
 Index: ioemu/xenstore.c
 ===================================================================
---- ioemu.orig/xenstore.c      2006-08-09 21:32:25.501750317 +0100
-+++ ioemu/xenstore.c   2006-08-09 21:32:25.706727195 +0100
+--- ioemu.orig/xenstore.c      2006-08-17 19:50:18.867050616 +0100
++++ ioemu/xenstore.c   2006-08-17 19:50:22.274673964 +0100
 @@ -185,3 +185,31 @@
      free(image);
      free(vec);
@@ -36,9 +36,9 @@ Index: ioemu/xenstore.c
 +}
 Index: ioemu/vl.c
 ===================================================================
---- ioemu.orig/vl.c    2006-08-09 21:32:25.504749978 +0100
-+++ ioemu/vl.c 2006-08-09 21:32:25.709726857 +0100
-@@ -6511,6 +6511,7 @@
+--- ioemu.orig/vl.c    2006-08-17 19:50:18.870050284 +0100
++++ ioemu/vl.c 2006-08-17 19:50:22.277673633 +0100
+@@ -6529,6 +6529,7 @@
        vnc_display = vnc_display_init(ds, vnc_display, vncunused);
        if (vncviewer)
            vnc_start_viewer(vnc_display);
@@ -48,9 +48,9 @@ Index: ioemu/vl.c
          sdl_display_init(ds, full_screen);
 Index: ioemu/vl.h
 ===================================================================
---- ioemu.orig/vl.h    2006-08-09 21:32:25.506749753 +0100
-+++ ioemu/vl.h 2006-08-09 21:32:25.710726744 +0100
-@@ -1206,6 +1206,7 @@
+--- ioemu.orig/vl.h    2006-08-17 19:50:18.872050063 +0100
++++ ioemu/vl.h 2006-08-17 19:50:22.278673522 +0100
+@@ -1207,6 +1207,7 @@
  int xenstore_fd(void);
  void xenstore_process_event(void *opaque);
  void xenstore_check_new_media_present(int timeout);
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/vl.c  Sun Aug 20 11:08:45 2006 -0400
@@ -5835,7 +5835,7 @@ int main(int argc, char **argv)
     QEMUMachine *machine;
     char usb_devices[MAX_USB_CMDLINE][128];
     int usb_devices_index;
-    unsigned long nr_pages;
+    unsigned long nr_pages, tmp_nr_pages, shared_page_nr;
     xen_pfn_t *page_array;
     extern void *shared_page;
     extern void *buffered_io_page;
@@ -6036,10 +6036,11 @@ int main(int argc, char **argv)
                 }
                 break;
             case QEMU_OPTION_nographic:
-                pstrcpy(monitor_device, sizeof(monitor_device), "stdio");
+                if(!strcmp(monitor_device, "vc"))
+                    pstrcpy(monitor_device, sizeof(monitor_device), "null");
                 if(!strcmp(serial_devices[0], "vc"))
                     pstrcpy(serial_devices[0], sizeof(serial_devices[0]),
-                            "stdio");
+                            "null");
                 nographic = 1;
                 break;
             case QEMU_OPTION_kernel:
@@ -6365,17 +6366,27 @@ int main(int argc, char **argv)
     /* init the memory */
     phys_ram_size = ram_size + vga_ram_size + bios_size;
 
+#ifdef CONFIG_DM
+
+    xc_handle = xc_interface_open();
+
 #if defined (__ia64__)
     if (ram_size > MMIO_START)
-       ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
-#endif
-
-#ifdef CONFIG_DM
+        ram_size += 1 * MEM_G; /* skip 3G-4G MMIO, LEGACY_IO_SPACE etc. */
+#endif
 
     nr_pages = ram_size/PAGE_SIZE;
-    xc_handle = xc_interface_open();
-
-    page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t));
+    tmp_nr_pages = nr_pages;
+
+#if defined(__i386__) || defined(__x86_64__)
+    if (ram_size > HVM_BELOW_4G_RAM_END) {
+        tmp_nr_pages += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
+    } else
+        shared_page_nr = nr_pages - 1;
+#endif
+
+    page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
     if (page_array == NULL) {
         fprintf(logfile, "malloc returned error %d\n", errno);
         exit(-1);
@@ -6387,25 +6398,40 @@ int main(int argc, char **argv)
         exit(-1);
     }
 
+    if (ram_size > HVM_BELOW_4G_RAM_END)
+        for (i = 0; i < nr_pages - (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT); i++)
+            page_array[tmp_nr_pages - 1 - i] = page_array[nr_pages - 1 - i];
+
     phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
                                          PROT_READ|PROT_WRITE, page_array,
-                                         nr_pages - 3);
-    if (phys_ram_base == 0) {
-        fprintf(logfile, "xc_map_foreign_batch returned error %d\n", errno);
+                                         tmp_nr_pages);
+    if (phys_ram_base == NULL) {
+        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
         exit(-1);
     }
+
+    shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE,
+                                       page_array[shared_page_nr]);
+    if (shared_page == NULL) {
+        fprintf(logfile, "map shared IO page returned error %d\n", errno);
+        exit(-1);
+    }
+
+    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
+            shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
 
     /* not yet add for IA64 */
     buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                       PROT_READ|PROT_WRITE,
-                                       page_array[nr_pages - 3]);
-
-    shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                       PROT_READ|PROT_WRITE,
-                                       page_array[nr_pages - 1]);
-
-    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", nr_pages - 1,
-            (uint64_t)(page_array[nr_pages - 1]));
+                                            PROT_READ|PROT_WRITE,
+                                            page_array[shared_page_nr - 2]);
+    if (buffered_io_page == NULL) {
+        fprintf(logfile, "map buffered IO page returned error %d\n", errno);
+        exit(-1);
+    }
+
+    fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n",
+            shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2]));
 
     free(page_array);
 
@@ -6431,9 +6457,9 @@ int main(int argc, char **argv)
     }
 
     if (ram_size > MMIO_START) {       
-       for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++)
-           page_array[MMIO_START >> PAGE_SHIFT + i] =
-               page_array[IO_PAGE_START >> PAGE_SHIFT + 1];
+        for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++)
+            page_array[MMIO_START >> PAGE_SHIFT + i] =
+                page_array[IO_PAGE_START >> PAGE_SHIFT + 1];
     }
 
     phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/ioemu/vl.h  Sun Aug 20 11:08:45 2006 -0400
@@ -39,6 +39,7 @@
 #include <sys/stat.h>
 #include "xenctrl.h"
 #include "xs.h"
+#include <xen/hvm/e820.h>
 
 #ifndef O_LARGEFILE
 #define O_LARGEFILE 0
@@ -1208,6 +1209,9 @@ void xenstore_check_new_media_present(in
 void xenstore_check_new_media_present(int timeout);
 void xenstore_write_vncport(int vnc_display);
 
+/* xen_platform.c */
+void pci_xen_platform_init(PCIBus *bus);
+
 
 void kqemu_record_dump(void);
 
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libaio/src/Makefile
--- a/tools/libaio/src/Makefile Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/libaio/src/Makefile Sun Aug 20 11:08:45 2006 -0400
@@ -1,3 +1,6 @@ prefix=/usr
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
 prefix=/usr
 includedir=$(prefix)/include
 libdir=$(prefix)/lib
@@ -44,8 +47,8 @@ libaio_sobjs := $(patsubst %.c,%.os,$(li
 
 libaio.a: $(libaio_objs)
        rm -f libaio.a
-       ar r libaio.a $^
-       ranlib libaio.a
+       $(AR) r libaio.a $^
+       $(RANLIB) libaio.a
 
 $(libname): $(libaio_sobjs) libaio.map
        $(CC) $(SO_CFLAGS) -Wl,--version-script=libaio.map 
-Wl,-soname=$(soname) -o $@ $(libaio_sobjs) $(LINK_FLAGS)
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/libxc/xc_domain.c   Sun Aug 20 11:08:45 2006 -0400
@@ -213,21 +213,28 @@ int xc_shadow_control(int xc_handle,
                       unsigned int sop,
                       unsigned long *dirty_bitmap,
                       unsigned long pages,
-                      xc_shadow_control_stats_t *stats )
+                      unsigned long *mb,
+                      uint32_t mode,
+                      xc_shadow_control_stats_t *stats)
 {
     int rc;
     DECLARE_DOM0_OP;
     op.cmd = DOM0_SHADOW_CONTROL;
     op.u.shadow_control.domain = (domid_t)domid;
     op.u.shadow_control.op     = sop;
+    op.u.shadow_control.pages  = pages;
+    op.u.shadow_control.mb     = mb ? *mb : 0;
+    op.u.shadow_control.mode   = mode;
     set_xen_guest_handle(op.u.shadow_control.dirty_bitmap, dirty_bitmap);
-    op.u.shadow_control.pages  = pages;
 
     rc = do_dom0_op(xc_handle, &op);
 
     if ( stats )
         memcpy(stats, &op.u.shadow_control.stats,
                sizeof(xc_shadow_control_stats_t));
+    
+    if ( mb ) 
+        *mb = op.u.shadow_control.mb;
 
     return (rc == 0) ? op.u.shadow_control.pages : rc;
 }
@@ -391,7 +398,7 @@ int xc_domain_memory_populate_physmap(in
 
     if ( err > 0 )
     {
-        DPRINTF("Failed deallocation for dom %d: %ld pages order %d\n",
+        DPRINTF("Failed allocation for dom %d: %ld pages order %d\n",
                 domid, nr_extents, extent_order);
         errno = EBUSY;
         err = -1;
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/libxc/xc_hvm_build.c        Sun Aug 20 11:08:45 2006 -0400
@@ -54,9 +54,19 @@ static void build_e820map(void *e820_pag
 {
     struct e820entry *e820entry =
         (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
+    unsigned long long extra_mem_size = 0;
     unsigned char nr_map = 0;
 
-    /* XXX: Doesn't work for > 4GB yet */
+    /*
+     * physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
+     * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
+     * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
+     */
+    if ( mem_size > HVM_BELOW_4G_RAM_END ) {
+        extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
+        mem_size = HVM_BELOW_4G_RAM_END;
+    }
+
     e820entry[nr_map].addr = 0x0;
     e820entry[nr_map].size = 0x9F000;
     e820entry[nr_map].type = E820_RAM;
@@ -77,53 +87,86 @@ static void build_e820map(void *e820_pag
     e820entry[nr_map].type = E820_RESERVED;
     nr_map++;
 
-#define STATIC_PAGES    3
-    /* 3 static pages:
-     * - ioreq buffer.
-     * - xenstore.
-     * - shared_page.
-     */
+/* ACPI data: 10 pages. */
+#define ACPI_DATA_PAGES     10
+/* ACPI NVS: 3 pages.   */
+#define ACPI_NVS_PAGES      3
+/* buffered io page.    */
+#define BUFFERED_IO_PAGES   1
+/* xenstore page.       */
+#define XENSTORE_PAGES      1
+/* shared io page.      */
+#define SHARED_IO_PAGES     1
+/* totally 16 static pages are reserved in E820 table */
 
     /* Most of the ram goes here */
     e820entry[nr_map].addr = 0x100000;
-    e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE;
+    e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE *
+                                                (ACPI_DATA_PAGES +
+                                                 ACPI_NVS_PAGES +
+                                                 BUFFERED_IO_PAGES +
+                                                 XENSTORE_PAGES +
+                                                 SHARED_IO_PAGES);
     e820entry[nr_map].type = E820_RAM;
     nr_map++;
 
     /* Statically allocated special pages */
 
+    /* For ACPI data */
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (ACPI_DATA_PAGES +
+                                         ACPI_NVS_PAGES +
+                                         BUFFERED_IO_PAGES +
+                                         XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * ACPI_DATA_PAGES;
+    e820entry[nr_map].type = E820_ACPI;
+    nr_map++;
+
+    /* For ACPI NVS */
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (ACPI_NVS_PAGES +
+                                         BUFFERED_IO_PAGES +
+                                         XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * ACPI_NVS_PAGES;
+    e820entry[nr_map].type = E820_NVS;
+    nr_map++;
+
     /* For buffered IO requests */
-    e820entry[nr_map].addr = mem_size - 3 * PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (BUFFERED_IO_PAGES +
+                                         XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * BUFFERED_IO_PAGES;
     e820entry[nr_map].type = E820_BUFFERED_IO;
     nr_map++;
 
     /* For xenstore */
-    e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE *
+                                        (XENSTORE_PAGES +
+                                         SHARED_IO_PAGES);
+    e820entry[nr_map].size = PAGE_SIZE * XENSTORE_PAGES;
     e820entry[nr_map].type = E820_XENSTORE;
     nr_map++;
 
     /* Shared ioreq_t page */
-    e820entry[nr_map].addr = mem_size - PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE * SHARED_IO_PAGES;
+    e820entry[nr_map].size = PAGE_SIZE * SHARED_IO_PAGES;
     e820entry[nr_map].type = E820_SHARED_PAGE;
-    nr_map++;
-
-    e820entry[nr_map].addr = mem_size;
-    e820entry[nr_map].size = 0x3 * PAGE_SIZE;
-    e820entry[nr_map].type = E820_NVS;
-    nr_map++;
-
-    e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
-    e820entry[nr_map].size = 0xA * PAGE_SIZE;
-    e820entry[nr_map].type = E820_ACPI;
     nr_map++;
 
     e820entry[nr_map].addr = 0xFEC00000;
     e820entry[nr_map].size = 0x1400000;
     e820entry[nr_map].type = E820_IO;
     nr_map++;
+
+    if ( extra_mem_size ) {
+        e820entry[nr_map].addr = (1ULL << 32);
+        e820entry[nr_map].size = extra_mem_size;
+        e820entry[nr_map].type = E820_RAM;
+        nr_map++;
+    }
 
     *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
 }
@@ -147,7 +190,7 @@ static void set_hvm_info_checksum(struct
  */
 static int set_hvm_info(int xc_handle, uint32_t dom,
                         xen_pfn_t *pfn_list, unsigned int vcpus,
-                        unsigned int acpi, unsigned int apic)
+                        unsigned int acpi)
 {
     char *va_map;
     struct hvm_info_table *va_hvm;
@@ -170,8 +213,6 @@ static int set_hvm_info(int xc_handle, u
     set_hvm_info_checksum(va_hvm);
 
     munmap(va_map, PAGE_SIZE);
-
-    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);
 
     return 0;
 }
@@ -200,11 +241,7 @@ static int setup_guest(int xc_handle,
     struct domain_setup_info dsi;
     uint64_t v_end;
 
-    unsigned long shared_page_frame = 0;
-    shared_iopage_t *sp;
-
-    unsigned long ioreq_buffer_frame = 0;
-    void *ioreq_buffer_page;
+    unsigned long shared_page_nr;
 
     memset(&dsi, 0, sizeof(struct domain_setup_info));
 
@@ -256,23 +293,38 @@ static int setup_guest(int xc_handle,
     /* Write the machine->phys table entries. */
     for ( count = 0; count < nr_pages; count++ )
     {
+        unsigned long gpfn_count_skip;
+
         ptr = (unsigned long long)page_array[count] << PAGE_SHIFT;
+
+        gpfn_count_skip = 0;
+
+        /*
+         * physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
+         * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
+         * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
+         */
+        if ( count >= (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) )
+            gpfn_count_skip = HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
         if ( xc_add_mmu_update(xc_handle, mmu,
-                               ptr | MMU_MACHPHYS_UPDATE, count) )
+                               ptr | MMU_MACHPHYS_UPDATE,
+                               count + gpfn_count_skip) )
             goto error_out;
     }
 
-    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) )
+    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi) )
     {
         ERROR("Couldn't set hvm info for HVM guest.\n");
         goto error_out;
     }
 
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);
 
     if ( (e820_page = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
+              page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == NULL )
         goto error_out;
     memset(e820_page, 0, PAGE_SIZE);
     build_e820map(e820_page, v_end);
@@ -281,7 +333,7 @@ static int setup_guest(int xc_handle,
     /* shared_info page starts its life empty. */
     if ( (shared_info = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              shared_info_frame)) == 0 )
+              shared_info_frame)) == NULL )
         goto error_out;
     memset(shared_info, 0, PAGE_SIZE);
     /* Mask all upcalls... */
@@ -289,32 +341,25 @@ static int setup_guest(int xc_handle,
         shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
     munmap(shared_info, PAGE_SIZE);
 
+    if ( v_end > HVM_BELOW_4G_RAM_END )
+        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
+    else
+        shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
+
+    *store_mfn = page_array[shared_page_nr - 1];
+
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, *store_mfn);
+    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
+
     /* Paranoia */
-    shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
-    if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
-              xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              shared_page_frame)) == 0 )
-        goto error_out;
-    memset(sp, 0, PAGE_SIZE);
-    munmap(sp, PAGE_SIZE);
+    /* clean the shared IO requests page */
+    if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr]) )
+        goto error_out;
 
     /* clean the buffered IO requests page */
-    ioreq_buffer_frame = page_array[(v_end >> PAGE_SHIFT) - 3];
-    ioreq_buffer_page = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                             PROT_READ | PROT_WRITE,
-                                             ioreq_buffer_frame);
-
-    if ( ioreq_buffer_page == NULL )
-        goto error_out;
-
-    memset(ioreq_buffer_page, 0, PAGE_SIZE);
-
-    munmap(ioreq_buffer_page, PAGE_SIZE);
-
-    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, (v_end >> 
PAGE_SHIFT) - 2);
-    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
-
-    *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
+    if ( xc_clear_domain_page(xc_handle, dom, page_array[shared_page_nr - 2]) )
+        goto error_out;
+
     if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
         goto error_out;
 
@@ -395,6 +440,19 @@ static int xc_hvm_build_internal(int xc_
         PERROR("Could not get info on domain");
         goto error_out;
     }
+
+    /* HVM domains must be put into shadow2 mode at the start of day */
+    if ( xc_shadow_control(xc_handle, domid, DOM0_SHADOW2_CONTROL_OP_ENABLE,
+                           NULL, 0, NULL, 
+                           DOM0_SHADOW2_CONTROL_FLAG_ENABLE 
+                           | DOM0_SHADOW2_CONTROL_FLAG_REFCOUNT
+                           | DOM0_SHADOW2_CONTROL_FLAG_TRANSLATE
+                           | DOM0_SHADOW2_CONTROL_FLAG_EXTERNAL, 
+                           NULL) ) 
+    {
+        PERROR("Could not enable shadow paging for domain.\n");
+        goto error_out;
+    }        
 
     memset(ctxt, 0, sizeof(*ctxt));
 
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/libxc/xc_linux_build.c      Sun Aug 20 11:08:45 2006 -0400
@@ -972,7 +972,7 @@ static int setup_guest(int xc_handle,
         /* Enable shadow translate mode */
         if ( xc_shadow_control(xc_handle, dom,
                                DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE,
-                               NULL, 0, NULL) < 0 )
+                               NULL, 0, NULL, 0, NULL) < 0 )
         {
             PERROR("Could not enable translation mode");
             goto error_out;
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/libxc/xc_linux_save.c       Sun Aug 20 11:08:45 2006 -0400
@@ -338,13 +338,13 @@ static int analysis_phase(int xc_handle,
         int i;
 
         xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_CLEAN,
-                          arr, max_pfn, NULL);
+                          arr, max_pfn, NULL, 0, NULL);
         DPRINTF("#Flush\n");
         for ( i = 0; i < 40; i++ ) {
             usleep(50000);
             now = llgettimeofday();
             xc_shadow_control(xc_handle, domid, DOM0_SHADOW_CONTROL_OP_PEEK,
-                              NULL, 0, &stats);
+                              NULL, 0, NULL, 0, &stats);
 
             DPRINTF("now= %lld faults= %" PRId32 " dirty= %" PRId32
                     " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n",
@@ -727,7 +727,7 @@ int xc_linux_save(int xc_handle, int io_
 
         if (xc_shadow_control(xc_handle, dom,
                               DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
-                              NULL, 0, NULL ) < 0) {
+                              NULL, 0, NULL, 0, NULL) < 0) {
             ERR("Couldn't enable shadow mode");
             goto out;
         }
@@ -879,7 +879,7 @@ int xc_linux_save(int xc_handle, int io_
                but this is fast enough for the moment. */
             if (!last_iter && xc_shadow_control(
                     xc_handle, dom, DOM0_SHADOW_CONTROL_OP_PEEK,
-                    to_skip, max_pfn, NULL) != max_pfn) {
+                    to_skip, max_pfn, NULL, 0, NULL) != max_pfn) {
                 ERR("Error peeking shadow bitmap");
                 goto out;
             }
@@ -1084,8 +1084,9 @@ int xc_linux_save(int xc_handle, int io_
                         (unsigned long)ctxt.user_regs.edx);
             }
 
-            if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_CLEAN,
-                                  to_send, max_pfn, &stats ) != max_pfn) {
+            if (xc_shadow_control(xc_handle, dom, 
+                                  DOM0_SHADOW_CONTROL_OP_CLEAN, to_send, 
+                                  max_pfn, NULL, 0, &stats) != max_pfn) {
                 ERR("Error flushing shadow PT");
                 goto out;
             }
@@ -1174,8 +1175,9 @@ int xc_linux_save(int xc_handle, int io_
  out:
 
     if (live) {
-        if(xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF,
-                             NULL, 0, NULL ) < 0) {
+        if(xc_shadow_control(xc_handle, dom, 
+                             DOM0_SHADOW_CONTROL_OP_OFF,
+                             NULL, 0, NULL, 0, NULL) < 0) {
             DPRINTF("Warning - couldn't disable shadow mode");
         }
     }
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/libxc/xenctrl.h     Sun Aug 20 11:08:45 2006 -0400
@@ -323,6 +323,8 @@ int xc_shadow_control(int xc_handle,
                       unsigned int sop,
                       unsigned long *dirty_bitmap,
                       unsigned long pages,
+                      unsigned long *mb,
+                      uint32_t mode,
                       xc_shadow_control_stats_t *stats);
 
 int xc_bvtsched_global_set(int xc_handle,
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/misc/xc_shadow.c
--- a/tools/misc/xc_shadow.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/misc/xc_shadow.c    Sun Aug 20 11:08:45 2006 -0400
@@ -60,6 +60,8 @@ int main(int argc, char *argv[])
                            mode, 
                            NULL,
                            0,
+                           NULL,
+                           0,
                            NULL) < 0 )
     {    
         fprintf(stderr, "Error reseting performance counters: %d (%s)\n",
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/lowlevel/xc/xc.c Sun Aug 20 11:08:45 2006 -0400
@@ -672,6 +672,59 @@ static PyObject *pyxc_sedf_domain_get(Xc
                          "weight",    weight);
 }
 
+static PyObject *pyxc_shadow_control(PyObject *self,
+                                     PyObject *args,
+                                     PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+
+    uint32_t dom;
+    int op=0;
+
+    static char *kwd_list[] = { "dom", "op", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list, 
+                                      &dom, &op) )
+        return NULL;
+    
+    if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0, NULL, 0, NULL) 
+         < 0 )
+        return PyErr_SetFromErrno(xc_error);
+    
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_shadow_mem_control(PyObject *self,
+                                         PyObject *args,
+                                         PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    int op;
+    uint32_t dom;
+    int mbarg = -1;
+    unsigned long mb;
+
+    static char *kwd_list[] = { "dom", "mb", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list, 
+                                      &dom, &mbarg) )
+        return NULL;
+    
+    if ( mbarg < 0 ) 
+        op = DOM0_SHADOW2_CONTROL_OP_GET_ALLOCATION;
+    else 
+    {
+        mb = mbarg;
+        op = DOM0_SHADOW2_CONTROL_OP_SET_ALLOCATION;
+    }
+    if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0, &mb, 0, NULL) < 0 )
+        return PyErr_SetFromErrno(xc_error);
+    
+    mbarg = mb;
+    return Py_BuildValue("i", mbarg);
+}
+
 static PyObject *pyxc_sched_credit_domain_set(XcObject *self,
                                               PyObject *args,
                                               PyObject *kwds)
@@ -1121,6 +1174,22 @@ static PyMethodDef pyxc_methods[] = {
       "Get information about the Xen host\n"
       "Returns [dict]: information about Xen"
       "        [None]: on failure.\n" },
+
+    { "shadow_control", 
+      (PyCFunction)pyxc_shadow_control, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Set parameter for shadow pagetable interface\n"
+      " dom [int]:   Identifier of domain.\n"
+      " op [int, 0]: operation\n\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "shadow_mem_control", 
+      (PyCFunction)pyxc_shadow_mem_control, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Set or read shadow pagetable memory use\n"
+      " dom [int]:   Identifier of domain.\n"
+      " mb [int, -1]: MB of shadow memory this domain should have.\n\n"
+      "Returns: [int] MB of shadow memory in use by this domain.\n" },
 
     { "domain_setmaxmem", 
       (PyCFunction)pyxc_domain_setmaxmem, 
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/xend/XendDomain.py       Sun Aug 20 11:08:45 2006 -0400
@@ -532,6 +532,30 @@ class XendDomain:
         except Exception, ex:
             raise XendError(str(ex))
 
+    def domain_shadow_control(self, domid, op):
+        """Shadow page control."""
+        dominfo = self.domain_lookup(domid)
+        try:
+            return xc.shadow_control(dominfo.getDomid(), op)
+        except Exception, ex:
+            raise XendError(str(ex))
+
+    def domain_shadow_mem_get(self, domid):
+        """Get shadow pagetable memory allocation."""
+        dominfo = self.domain_lookup(domid)
+        try:
+            return xc.shadow_mem_control(dominfo.getDomid())
+        except Exception, ex:
+            raise XendError(str(ex))
+
+    def domain_shadow_mem_set(self, domid, mb):
+        """Set shadow pagetable memory allocation."""
+        dominfo = self.domain_lookup(domid)
+        try:
+            return xc.shadow_mem_control(dominfo.getDomid(), mb=mb)
+        except Exception, ex:
+            raise XendError(str(ex))
+
     def domain_sched_credit_get(self, domid):
         """Get credit scheduler parameters for a domain.
         """
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/xend/XendDomainInfo.py   Sun Aug 20 11:08:45 2006 -0400
@@ -30,6 +30,7 @@ import time
 import time
 import threading
 import os
+import math
 
 import xen.lowlevel.xc
 from xen.util import asserts
@@ -126,16 +127,17 @@ VM_CONFIG_PARAMS = [
 # don't come out of xc in the same form as they are specified in the config
 # file, so those are handled separately.
 ROUNDTRIPPING_CONFIG_ENTRIES = [
-    ('uuid',       str),
-    ('vcpus',      int),
-    ('vcpu_avail', int),
-    ('cpu_weight', float),
-    ('memory',     int),
-    ('maxmem',     int),
-    ('bootloader', str),
+    ('uuid',            str),
+    ('vcpus',           int),
+    ('vcpu_avail',      int),
+    ('cpu_weight',      float),
+    ('memory',          int),
+    ('shadow_memory',   int),
+    ('maxmem',          int),
+    ('bootloader',      str),
     ('bootloader_args', str),
-    ('features', str),
-    ('localtime', int),
+    ('features',        str),
+    ('localtime',       int),
     ]
 
 ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS
@@ -146,12 +148,13 @@ ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFI
 # entries written to the store that cannot be reconfigured on-the-fly.
 #
 VM_STORE_ENTRIES = [
-    ('uuid',       str),
-    ('vcpus',      int),
-    ('vcpu_avail', int),
-    ('memory',     int),
-    ('maxmem',     int),
-    ('start_time', float),
+    ('uuid',          str),
+    ('vcpus',         int),
+    ('vcpu_avail',    int),
+    ('memory',        int),
+    ('shadow_memory', int),
+    ('maxmem',        int),
+    ('start_time',    float),
     ]
 
 VM_STORE_ENTRIES += VM_CONFIG_PARAMS
@@ -572,6 +575,7 @@ class XendDomainInfo:
             defaultInfo('vcpu_avail',   lambda: (1 << self.info['vcpus']) - 1)
 
             defaultInfo('memory',       lambda: 0)
+            defaultInfo('shadow_memory', lambda: 0)
             defaultInfo('maxmem',       lambda: 0)
             defaultInfo('bootloader',   lambda: None)
             defaultInfo('bootloader_args', lambda: None)            
@@ -1276,14 +1280,34 @@ class XendDomainInfo:
                 for v in range(0, self.info['max_vcpu_id']+1):
                     xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
 
-            # set memory limit
-            maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024)
-            xc.domain_setmaxmem(self.domid, maxmem)
-
-            # initial memory allocation
-            mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024)
-            balloon.free(mem_kb)
-            xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0)
+            # set domain maxmem in KiB
+            xc.domain_setmaxmem(self.domid, self.info['maxmem'] * 1024)
+
+            m = self.image.getDomainMemory(self.info['memory'] * 1024)
+
+            # get the domain's shadow memory requirement
+            sm = int(math.ceil(self.image.getDomainShadowMemory(m) / 1024.0))
+            if self.info['shadow_memory'] > sm:
+                sm = self.info['shadow_memory']
+
+            # Make sure there's enough RAM available for the domain
+            balloon.free(m + sm * 1024)
+
+            # Set up the shadow memory
+            sm = xc.shadow_mem_control(self.domid, mb=sm)
+            self.info['shadow_memory'] = sm
+
+            init_reservation = self.info['memory'] * 1024
+            if os.uname()[4] in ('ia64', 'ppc64'):
+                # Workaround for architectures that don't yet support
+                # ballooning.
+                init_reservation = m
+                # Following line from xiantao.zhang@xxxxxxxxx
+                # Needed for IA64 until supports ballooning -- okay for PPC64?
+                xc.domain_setmaxmem(self.domid, m)
+
+            xc.domain_memory_increase_reservation(self.domid, init_reservation,
+                                                  0, 0)
 
             self.createChannels()
 
@@ -1518,13 +1542,12 @@ class XendDomainInfo:
         return self.getDeviceController(dev_type).sxpr(devid)
 
 
-    def device_configure(self, dev_config, devid):
+    def device_configure(self, dev_config):
         """Configure an existing device.
         @param dev_config: device configuration
-        @param devid:      device id
         """
         deviceClass = sxp.name(dev_config)
-        self.reconfigureDevice(deviceClass, devid, dev_config)
+        self.reconfigureDevice(deviceClass, None, dev_config)
 
 
     def pause(self):
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/XendLogging.py
--- a/tools/python/xen/xend/XendLogging.py      Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/xend/XendLogging.py      Sun Aug 20 11:08:45 2006 -0400
@@ -57,7 +57,7 @@ class XendRotatingFileHandler(logging.ha
         self.setCloseOnExec()
 
     def doRollover(self):
-        logging.handlers.RotatingFileHandler.doRollover()
+        logging.handlers.RotatingFileHandler.doRollover(self)
         self.setCloseOnExec()
 
     # NB yes accessing 'self.stream' violates OO encapsulation somewhat,
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/xend/image.py    Sun Aug 20 11:08:45 2006 -0400
@@ -145,6 +145,12 @@ class ImageHandler:
 
     def getRequiredMemory(self, mem_kb):
         return mem_kb
+
+    def getDomainShadowMemory(self, mem_kb):
+        """@return The minimum shadow memory required, in KiB, for a domain 
+        with mem_kb KiB of RAM."""
+        # PV domains don't need any shadow memory
+        return 0
 
     def buildDomain(self):
         """Build the domain. Define in subclass."""
@@ -372,6 +378,32 @@ class HVMImageHandler(ImageHandler):
         os.waitpid(self.pid, 0)
         self.pid = 0
 
+    def getDomainMemory(self, mem_kb):
+        """@see ImageHandler.getDomainMemory"""
+        if os.uname()[4] == 'ia64':
+            page_kb = 16
+            # ROM size for guest firmware, ioreq page and xenstore page
+            extra_pages = 1024 + 2
+        else:
+            page_kb = 4
+            # This was derived emperically:
+            #   2.4 MB overhead per 1024 MB RAM + 8 MB constant
+            #   + 4 to avoid low-memory condition
+            extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12;
+            extra_pages = int( math.ceil( extra_mb*1024 / page_kb ))
+        return mem_kb + extra_pages * page_kb
+
+    def getDomainShadowMemory(self, mem_kb):
+        """@return The minimum shadow memory required, in KiB, for a domain 
+        with mem_kb KiB of RAM."""
+        if os.uname()[4] in ('ia64', 'ppc64'):
+            # Explicit shadow memory is not a concept 
+            return 0
+        else:
+            # 1MB per vcpu plus 4Kib/Mib of RAM.  This is higher than 
+            # the minimum that Xen would allocate if no value were given.
+            return 1024 * self.vm.getVCpuCount() + mem_kb / 256
+
     def register_shutdown_watch(self):
         """ add xen store watch on control/shutdown """
         self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \
diff -r 96d6f9cfed6e -r 4cffec02b478 
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py     Sun Aug 20 11:07:52 
2006 -0400
+++ b/tools/python/xen/xend/server/DevController.py     Sun Aug 20 11:08:45 
2006 -0400
@@ -206,15 +206,9 @@ class DevController:
         """
 
         devid = int(devid)
-        
-        frontpath = self.frontendPath(devid)
-        backpath = xstransact.Read(frontpath, "backend")
-
-        if backpath:
-            xstransact.Write(backpath, 'state', str(xenbusState['Closing']))
-        else:
-            raise VmError("Device %s not connected" % devid)
-           
+
+        self.writeBackend(devid, 'state', str(xenbusState['Closing']))
+
 
     def configurations(self):
         return map(self.configuration, self.deviceIDs())
@@ -355,6 +349,16 @@ class DevController:
             return map(int, xstransact.List(fe))
 
 
+    def writeBackend(self, devid, *args):
+        frontpath = self.frontendPath(devid)
+        backpath = xstransact.Read(frontpath, "backend")
+
+        if backpath:
+            xstransact.Write(backpath, *args)
+        else:
+            raise VmError("Device %s not connected" % devid)
+
+
 ## private:
 
     def addStoreEntries(self, config, devid, backDetails, frontDetails):
diff -r 96d6f9cfed6e -r 4cffec02b478 
tools/python/xen/xend/server/XMLRPCServer.py
--- a/tools/python/xen/xend/server/XMLRPCServer.py      Sun Aug 20 11:07:52 
2006 -0400
+++ b/tools/python/xen/xend/server/XMLRPCServer.py      Sun Aug 20 11:08:45 
2006 -0400
@@ -24,6 +24,7 @@ from xen.util.xmlrpclib2 import UnixXMLR
 
 from xen.xend.XendClient import XML_RPC_SOCKET, ERROR_INVALID_DOMAIN
 from xen.xend.XendError import *
+from xen.xend.XendLogging import log
 from types import ListType
 
 def lookup(domid):
@@ -74,7 +75,8 @@ def get_log():
     finally:
         f.close()
 
-methods = ['device_create', 'destroyDevice', 'getDeviceSxprs',
+methods = ['device_create', 'device_configure', 'destroyDevice',
+           'getDeviceSxprs',
            'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown',
            'send_sysrq', 'getVCPUInfo', 'waitForDevices']
 
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/xend/server/blkif.py     Sun Aug 20 11:08:45 2006 -0400
@@ -13,7 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
-# Copyright (C) 2005 XenSource Ltd
+# Copyright (C) 2005, 2006 XenSource Inc.
 #============================================================================
 
 
@@ -76,6 +76,23 @@ class BlkifController(DevController):
         return (devid, back, front)
 
 
+    def reconfigureDevice(self, _, config):
+        """@see DevController.reconfigureDevice"""
+        (devid, new_back, new_front) = self.getDeviceDetails(config)
+
+        (dev, mode) = self.readBackend(devid, 'dev', 'mode')
+        dev_type = self.readFrontend(devid, 'device-type')
+
+        if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and
+            dev == new_back['dev'] and mode == 'r'):
+            self.writeBackend(devid,
+                              'type', new_back['type'],
+                              'params', new_back['params'])
+        else:
+            raise VmError('Refusing to reconfigure device %s:%d to %s' %
+                          (self.deviceClass, devid, config))
+
+
     def configuration(self, devid):
         """@see DevController.configuration"""
 
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/xm/create.py     Sun Aug 20 11:08:45 2006 -0400
@@ -157,6 +157,10 @@ gopts.var('maxmem', val='MEMORY',
 gopts.var('maxmem', val='MEMORY',
           fn=set_int, default=None,
           use="Maximum domain memory in MB.")
+
+gopts.var('shadow_memory', val='MEMORY',
+          fn=set_int, default=0,
+          use="Domain shadow memory in MB.")
 
 gopts.var('cpu', val='CPU',
           fn=set_int, default=None,
@@ -666,8 +670,9 @@ def make_config(vals):
             if v:
                 config.append([n, v])
 
-    map(add_conf, ['name', 'memory', 'maxmem', 'restart', 'on_poweroff',
-                   'on_reboot', 'on_crash', 'vcpus', 'features'])
+    map(add_conf, ['name', 'memory', 'maxmem', 'shadow_memory',
+                   'restart', 'on_poweroff', 'on_reboot', 'on_crash',
+                   'vcpus', 'features'])
 
     if vals.uuid is not None:
         config.append(['uuid', vals.uuid])
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/python/xen/xm/main.py       Sun Aug 20 11:08:45 2006 -0400
@@ -113,6 +113,8 @@ block_detach_help = """block-detach  <Do
                                     or the device name as mounted in the 
guest"""
 
 block_list_help = "block-list <DomId> [--long]      List virtual block devices 
for a domain"
+block_configure_help = """block-configure <DomId> <BackDev> <FrontDev> <Mode>
+                   [BackDomId] Change block device configuration"""
 network_attach_help = """network-attach  <DomID> [script=<script>] [ip=<ip>] 
[mac=<mac>]
                            [bridge=<bridge>] [backend=<backDomID>]
                                     Create a new virtual network device """
@@ -199,6 +201,7 @@ device_commands = [
     "block-attach",
     "block-detach",
     "block-list",
+    "block-configure",
     "network-attach",
     "network-detach",
     "network-list",
@@ -1055,9 +1058,8 @@ def xm_vtpm_list(args):
                    "%(be-path)-30s  "
                    % ni)
 
-def xm_block_attach(args):
-    arg_check(args, 'block-attach', 4, 5)
-
+
+def parse_block_configuration(args):
     dom = args[0]
 
     if args[1].startswith('tap:'):
@@ -1087,7 +1089,21 @@ def xm_block_attach(args):
         traceback.print_exc(limit=1)
         sys.exit(1)
 
+    return (dom, vbd)
+
+
+def xm_block_attach(args):
+    arg_check(args, 'block-attach', 4, 5)
+
+    (dom, vbd) = parse_block_configuration(args)
     server.xend.domain.device_create(dom, vbd)
+
+
+def xm_block_configure(args):
+    arg_check(args, 'block-configure', 4, 5)
+
+    (dom, vbd) = parse_block_configuration(args)
+    server.xend.domain.device_configure(dom, vbd)
 
 
 def xm_network_attach(args):
@@ -1201,6 +1217,7 @@ commands = {
     "block-attach": xm_block_attach,
     "block-detach": xm_block_detach,
     "block-list": xm_block_list,
+    "block-configure": xm_block_configure,
     # network
     "network-attach": xm_network_attach,
     "network-detach": xm_network_detach,
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/xenmon/Makefile
--- a/tools/xenmon/Makefile     Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/xenmon/Makefile     Sun Aug 20 11:08:45 2006 -0400
@@ -25,7 +25,7 @@ CFLAGS  += -I $(XEN_LIBXC)
 CFLAGS  += -I $(XEN_LIBXC)
 LDFLAGS += -L $(XEN_LIBXC)
 
-BIN = setmask xenbaked
+BIN = xentrace_setmask xenbaked
 SCRIPTS = xenmon.py
 
 .PHONY: all
@@ -35,10 +35,10 @@ build: $(BIN)
 build: $(BIN)
 
 .PHONY: install
-install: xenbaked setmask
+install: build
        [ -d $(DESTDIR)$(sbindir) ] || $(INSTALL_DIR) $(DESTDIR)$(sbindir)
        $(INSTALL_PROG) xenbaked $(DESTDIR)$(sbindir)/xenbaked
-       $(INSTALL_PROG) setmask  $(DESTDIR)$(sbindir)/setmask
+       $(INSTALL_PROG) xentrace_setmask  $(DESTDIR)$(sbindir)/xentrace_setmask
        $(INSTALL_PROG) xenmon.py  $(DESTDIR)$(sbindir)/xenmon.py
 
 .PHONY: clean
@@ -48,5 +48,5 @@ clean:
 
 %: %.c Makefile
        $(CC) $(CFLAGS) $(LDFLAGS) -lxenctrl -o $@ $<
-
-
+xentrace_%: %.c Makefile
+       $(CC) $(CFLAGS) $(LDFLAGS) -lxenctrl -o $@ $<
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/xenstore/Makefile   Sun Aug 20 11:08:45 2006 -0400
@@ -98,7 +98,7 @@ libxenstore.so.$(MAJOR).$(MINOR): xs.opi
        $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenstore.so.$(MAJOR) 
-shared -o $@ $^ -lpthread
 
 libxenstore.a: xs.o xs_lib.o
-       ar rcs libxenstore.a $^
+       $(AR) rcs libxenstore.a $^
 
 .PHONY: clean
 clean: testsuite-clean
diff -r 96d6f9cfed6e -r 4cffec02b478 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile   Sun Aug 20 11:07:52 2006 -0400
+++ b/tools/xentrace/Makefile   Sun Aug 20 11:08:45 2006 -0400
@@ -14,7 +14,7 @@ HDRS     = $(wildcard *.h)
 HDRS     = $(wildcard *.h)
 OBJS     = $(patsubst %.c,%.o,$(wildcard *.c))
 
-BIN      = xentrace setsize
+BIN      = xentrace xentrace_setsize
 LIBBIN   = 
 SCRIPTS  = xentrace_format
 MAN1     = $(wildcard *.1)
@@ -58,3 +58,5 @@ clean:
 
 %: %.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
+xentrace_%: %.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/Rules.mk
--- a/xen/Rules.mk      Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/Rules.mk      Sun Aug 20 11:08:45 2006 -0400
@@ -8,6 +8,9 @@ perfc_arrays?= n
 perfc_arrays?= n
 crash_debug ?= n
 
+XEN_ROOT=$(BASEDIR)/..
+include $(XEN_ROOT)/Config.mk
+
 # Hardcoded configuration implications and dependencies.
 # Do this is a neater way if it becomes unwieldy.
 ifeq ($(debug),y)
@@ -16,9 +19,6 @@ ifeq ($(perfc_arrays),y)
 ifeq ($(perfc_arrays),y)
 perfc := y
 endif
-
-XEN_ROOT=$(BASEDIR)/..
-include $(XEN_ROOT)/Config.mk
 
 # Set ARCH/SUBARCH appropriately.
 override COMPILE_SUBARCH := $(XEN_COMPILE_ARCH)
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c        Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/acm/acm_core.c        Sun Aug 20 11:08:45 2006 -0400
@@ -206,7 +206,7 @@ acm_setup(unsigned int *initrdidx,
     for (i = mbi->mods_count-1; i >= 1; i--)
     {
         struct acm_policy_buffer *pol;
-        char *_policy_start; 
+        char *_policy_start;
         unsigned long _policy_len;
 #if defined(__i386__)
         _policy_start = (char *)(initial_images_start + 
(mod[i].mod_start-mod[0].mod_start));
@@ -342,7 +342,7 @@ acm_init_domain_ssid(domid_t id, ssidref
     {
         printk("%s: ERROR instantiating individual ssids for domain 0x%02x.\n",
                __func__, subj->domain_id);
-        acm_free_domain_ssid(ssid); 
+        acm_free_domain_ssid(ssid);
         put_domain(subj);
         return ACM_INIT_SSID_ERROR;
     }
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/acm/acm_simple_type_enforcement_hooks.c
--- a/xen/acm/acm_simple_type_enforcement_hooks.c       Sun Aug 20 11:07:52 
2006 -0400
+++ b/xen/acm/acm_simple_type_enforcement_hooks.c       Sun Aug 20 11:08:45 
2006 -0400
@@ -86,10 +86,10 @@ int acm_init_ste_policy(void)
 
     /* init stats */
     atomic_set(&(ste_bin_pol.ec_eval_count), 0);
-    atomic_set(&(ste_bin_pol.ec_denied_count), 0); 
+    atomic_set(&(ste_bin_pol.ec_denied_count), 0);
     atomic_set(&(ste_bin_pol.ec_cachehit_count), 0);
     atomic_set(&(ste_bin_pol.gt_eval_count), 0);
-    atomic_set(&(ste_bin_pol.gt_denied_count), 0); 
+    atomic_set(&(ste_bin_pol.gt_denied_count), 0);
     atomic_set(&(ste_bin_pol.gt_cachehit_count), 0);
     return ACM_OK;
 }
@@ -100,7 +100,7 @@ ste_init_domain_ssid(void **ste_ssid, ss
 ste_init_domain_ssid(void **ste_ssid, ssidref_t ssidref)
 {
     int i;
-    struct ste_ssid *ste_ssidp = xmalloc(struct ste_ssid); 
+    struct ste_ssid *ste_ssidp = xmalloc(struct ste_ssid);
     traceprintk("%s.\n", __func__);
 
     if (ste_ssidp == NULL)
@@ -309,7 +309,7 @@ ste_set_policy(u8 *buf, u32 buf_size)
            sizeof(domaintype_t),
            ste_buf->ste_max_ssidrefs*ste_buf->ste_max_types);
 
-    /* 2. now re-calculate sharing decisions based on running domains; 
+    /* 2. now re-calculate sharing decisions based on running domains;
      *    this can fail if new policy is conflicting with sharing of running 
domains 
      *    now: reject violating new policy; future: adjust sharing through 
revoking sharing */
     if (ste_init_state(ste_buf, (domaintype_t *)ssidrefsbuf)) {
@@ -349,11 +349,11 @@ ste_dump_stats(u8 *buf, u16 buf_len)
     stats.ec_eval_count = htonl(atomic_read(&ste_bin_pol.ec_eval_count));
     stats.gt_eval_count = htonl(atomic_read(&ste_bin_pol.gt_eval_count));
     stats.ec_denied_count = htonl(atomic_read(&ste_bin_pol.ec_denied_count));
-    stats.gt_denied_count = htonl(atomic_read(&ste_bin_pol.gt_denied_count)); 
+    stats.gt_denied_count = htonl(atomic_read(&ste_bin_pol.gt_denied_count));
     stats.ec_cachehit_count = 
htonl(atomic_read(&ste_bin_pol.ec_cachehit_count));
     stats.gt_cachehit_count = 
htonl(atomic_read(&ste_bin_pol.gt_cachehit_count));
 
-    if (buf_len < sizeof(struct acm_ste_stats_buffer))
+    if (buf_len < sizeof(struct acm_ste_stats_buffer)
         return -ENOMEM;
 
     memcpy(buf, &stats, sizeof(struct acm_ste_stats_buffer));
@@ -523,8 +523,8 @@ ste_pre_eventchannel_unbound(domid_t id1
         cache_result(subj, obj);
         ret = ACM_ACCESS_PERMITTED;
     } else {
-        atomic_inc(&ste_bin_pol.ec_denied_count); 
-        ret = ACM_ACCESS_DENIED; 
+        atomic_inc(&ste_bin_pol.ec_denied_count);
+        ret = ACM_ACCESS_DENIED;
     }
   out:
     if (obj != NULL)
@@ -569,8 +569,8 @@ ste_pre_eventchannel_interdomain(domid_t
         cache_result(subj, obj);
         ret = ACM_ACCESS_PERMITTED;
     } else {
-        atomic_inc(&ste_bin_pol.ec_denied_count); 
-        ret = ACM_ACCESS_DENIED; 
+        atomic_inc(&ste_bin_pol.ec_denied_count);
+        ret = ACM_ACCESS_DENIED;
     }
  out:
     if (obj != NULL)
@@ -599,9 +599,9 @@ ste_pre_grant_map_ref (domid_t id) {
         cache_result(subj, obj);
         ret = ACM_ACCESS_PERMITTED;
     } else {
-        atomic_inc(&ste_bin_pol.gt_denied_count); 
+        atomic_inc(&ste_bin_pol.gt_denied_count);
         printkd("%s: ACCESS DENIED!\n", __func__);
-        ret = ACM_ACCESS_DENIED; 
+        ret = ACM_ACCESS_DENIED;
     }
     if (obj != NULL)
         put_domain(obj);
@@ -637,8 +637,8 @@ ste_pre_grant_setup (domid_t id) {
         cache_result(subj, obj);
         ret = ACM_ACCESS_PERMITTED;
     } else {
-        atomic_inc(&ste_bin_pol.gt_denied_count); 
-        ret = ACM_ACCESS_DENIED; 
+        atomic_inc(&ste_bin_pol.gt_denied_count);
+        ret = ACM_ACCESS_DENIED;
     }
     if (obj != NULL)
         put_domain(obj);
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/ia64/Rules.mk
--- a/xen/arch/ia64/Rules.mk    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/ia64/Rules.mk    Sun Aug 20 11:08:45 2006 -0400
@@ -2,6 +2,7 @@
 # ia64-specific definitions
 
 HAS_ACPI := y
+HAS_VGA  := y
 VALIDATE_VT    ?= n
 no_warns ?= n
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/ia64/xen/domain.c        Sun Aug 20 11:08:45 2006 -0400
@@ -864,6 +864,7 @@ int construct_dom0(struct domain *d,
 {
        int i, rc;
        start_info_t *si;
+       dom0_vga_console_info_t *ci;
        struct vcpu *v = d->vcpu[0];
        unsigned long max_pages;
 
@@ -1000,6 +1001,9 @@ int construct_dom0(struct domain *d,
        //if ( initrd_len != 0 )
        //    memcpy((void *)vinitrd_start, initrd_start, initrd_len);
 
+       BUILD_BUG_ON(sizeof(start_info_t) + sizeof(dom0_vga_console_info_t) +
+                    sizeof(struct ia64_boot_param) > PAGE_SIZE);
+
        /* Set up start info area. */
        d->shared_info->arch.start_info_pfn = pstart_info >> PAGE_SHIFT;
        start_info_page = assign_new_domain_page(d, pstart_info);
@@ -1034,7 +1038,8 @@ int construct_dom0(struct domain *d,
        strncpy((char *)si->cmd_line, dom0_command_line, sizeof(si->cmd_line));
        si->cmd_line[sizeof(si->cmd_line)-1] = 0;
 
-       bp = (struct ia64_boot_param *)(si + 1);
+       bp = (struct ia64_boot_param *)((unsigned char *)si +
+                                       sizeof(start_info_t));
        bp->command_line = pstart_info + offsetof (start_info_t, cmd_line);
 
        /* We assume console has reached the last line!  */
@@ -1048,6 +1053,16 @@ int construct_dom0(struct domain *d,
                     (PAGE_ALIGN(ia64_boot_param->initrd_size) + 4*1024*1024);
        bp->initrd_size = ia64_boot_param->initrd_size;
 
+       ci = (dom0_vga_console_info_t *)((unsigned char *)si +
+                                        sizeof(start_info_t) +
+                                        sizeof(struct ia64_boot_param));
+
+       if (fill_console_start_info(ci)) {
+               si->console.dom0.info_off = sizeof(start_info_t) +
+                                           sizeof(struct ia64_boot_param);
+               si->console.dom0.info_size = sizeof(dom0_vga_console_info_t);
+       }
+
        vcpu_init_regs (v);
 
        vcpu_regs(v)->r28 = bp_mpa;
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/ia64/xen/mm.c    Sun Aug 20 11:08:45 2006 -0400
@@ -1746,6 +1746,11 @@ int get_page_type(struct page_info *page
     return 1;
 }
 
+int memory_is_conventional_ram(paddr_t p)
+{
+    return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY);
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/Makefile     Sun Aug 20 11:08:45 2006 -0400
@@ -8,7 +8,6 @@ subdir-$(x86_64) += x86_64
 subdir-$(x86_64) += x86_64
 
 obj-y += apic.o
-obj-y += audit.o
 obj-y += bitops.o
 obj-y += compat.o
 obj-y += delay.o
@@ -41,12 +40,21 @@ obj-y += x86_emulate.o
 obj-y += x86_emulate.o
 
 ifneq ($(pae),n)
-obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o
+obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s3.o shadow2_g3_on_s3.o
 else
-obj-$(x86_32) += shadow32.o
+obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s2.o
 endif
 
-obj-$(x86_64) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o
+obj-$(x86_64) += shadow2-common.o shadow2_g4_on_s4.o shadow2_g3_on_s3.o \
+                 shadow2_g2_on_s3.o
+
+guest_levels  = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(subst 
shadow2_,,$(1))))))
+shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(subst 
shadow2_,,$(1))))))
+shadow2_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \
+                -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1))
+
+shadow2_%.o: shadow2.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) $(call shadow2_defns,$(@F)) -c $< -o $@
 
 obj-$(crash_debug) += gdbstub.o
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/Rules.mk
--- a/xen/arch/x86/Rules.mk     Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/Rules.mk     Sun Aug 20 11:08:45 2006 -0400
@@ -2,6 +2,7 @@
 # x86-specific definitions
 
 HAS_ACPI := y
+HAS_VGA  := y
 
 #
 # If you change any of these configuration options then you must
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/acpi/boot.c
--- a/xen/arch/x86/acpi/boot.c  Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/acpi/boot.c  Sun Aug 20 11:08:45 2006 -0400
@@ -107,7 +107,7 @@ char *__acpi_map_table(unsigned long phy
        int idx;
 
        if (phys + size < 8 * 1024 * 1024) 
-               return __va(phys); 
+               return __va(phys);
 
        offset = phys & (PAGE_SIZE - 1);
        mapped_size = PAGE_SIZE - offset;
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/apic.c       Sun Aug 20 11:08:45 2006 -0400
@@ -10,8 +10,8 @@
  *                  thanks to Eric Gilmore
  *                  and Rolf G. Tews
  *                  for testing these extensively.
- *     Maciej W. Rozycki       :       Various updates and fixes.
- *     Mikael Pettersson       :       Power Management for UP-APIC.
+ *    Maciej W. Rozycki :   Various updates and fixes.
+ *    Mikael Pettersson :   Power Management for UP-APIC.
  *    Pavel Machek and
  *    Mikael Pettersson    :    PM converted to driver model.
  */
@@ -166,7 +166,7 @@ void clear_local_APIC(void)
         apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
 #endif
     v = GET_APIC_VERSION(apic_read(APIC_LVR));
-    if (APIC_INTEGRATED(v)) {  /* !82489DX */
+    if (APIC_INTEGRATED(v)) {  /* !82489DX */
         if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
             apic_write(APIC_ESR, 0);
         apic_read(APIC_ESR);
@@ -878,9 +878,9 @@ int __init calibrate_APIC_clock(void)
                     ((long)(t2-t1)/LOOPS)%(1000000/HZ));
 
     apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-               "%ld.%04ld MHz.\n",
-               result/(1000000/HZ),
-               result%(1000000/HZ));
+                "%ld.%04ld MHz.\n",
+                result/(1000000/HZ),
+                result%(1000000/HZ));
 
     /* set up multipliers for accurate timer code */
     bus_freq   = result*HZ;
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/boot/x86_32.S        Sun Aug 20 11:08:45 2006 -0400
@@ -7,22 +7,22 @@
 
 #define  SECONDARY_CPU_FLAG 0xA5A5A5A5
                 
-               .text
+        .text
 
 ENTRY(start)
 ENTRY(stext)
 ENTRY(_stext)
         jmp __start
 
-        .align 4
+        .align 4
 
 /*** MULTIBOOT HEADER ****/
         /* Magic number indicating a Multiboot header. */
-       .long   0x1BADB002
-       /* Flags to bootloader (see Multiboot spec). */
-       .long   0x00000003
-       /* Checksum: must be the negated sum of the first two fields. */
-       .long   -0x1BADB005
+        .long 0x1BADB002
+        /* Flags to bootloader (see Multiboot spec). */
+        .long 0x00000003
+        /* Checksum: must be the negated sum of the first two fields. */
+        .long -0x1BADB005
         
 not_multiboot_msg:
         .asciz "ERR: Not a Multiboot bootloader!"
@@ -57,8 +57,8 @@ 1:      lss     stack_start-__PAGE_OFFSE
         add     $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
 
         /* Reset EFLAGS (subsumes CLI and CLD). */
-       pushl   $0
-       popf
+        pushl   $0
+        popf
 
         /* Set up FPU. */
         fninit
@@ -172,7 +172,7 @@ 1:      /* Paging enabled, so we can now
         je      start_secondary
 
         /* Call into main C routine. This should never return.*/
-               call    __start_xen
+        call    __start_xen
         ud2     /* Force a panic (invalid opcode). */
 
 /* This is the default interrupt handler. */
@@ -203,19 +203,19 @@ ENTRY(stack_start)
         
         .word   0    
 idt_descr:
-        .word  256*8-1
+        .word   256*8-1
 idt:
-        .long  idt_table
+        .long   idt_table
 
         .word   0
 gdt_descr:
-        .word  LAST_RESERVED_GDT_BYTE
+        .word   LAST_RESERVED_GDT_BYTE
 gdt:
         .long   gdt_table - FIRST_RESERVED_GDT_BYTE
 
         .word   0
 nopaging_gdt_descr:
-        .word  LAST_RESERVED_GDT_BYTE
+        .word   LAST_RESERVED_GDT_BYTE
         .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
         
         .org 0x1000
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/cpu/amd.c    Sun Aug 20 11:08:45 2006 -0400
@@ -280,7 +280,7 @@ static void __init init_amd(struct cpuin
                set_bit(X86_FEATURE_K8, c->x86_capability);
                break;
        case 6:
-               set_bit(X86_FEATURE_K7, c->x86_capability); 
+               set_bit(X86_FEATURE_K7, c->x86_capability);
                break;
        }
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/cpu/cyrix.c
--- a/xen/arch/x86/cpu/cyrix.c  Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/cpu/cyrix.c  Sun Aug 20 11:08:45 2006 -0400
@@ -145,7 +145,7 @@ static void __init set_cx86_inc(void)
        setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
        /* PCR0 -- Performance Control */
        /* Incrementor Margin 10 */
-       setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); 
+       setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
        setCx86(CX86_CCR3, ccr3);       /* disable MAPEN */
 }
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/cpu/transmeta.c
--- a/xen/arch/x86/cpu/transmeta.c      Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/cpu/transmeta.c      Sun Aug 20 11:08:45 2006 -0400
@@ -19,7 +19,7 @@ static void __init init_transmeta(struct
        max = cpuid_eax(0x80860000);
        cpu_rev = 0;
        if ( max >= 0x80860001 ) {
-               cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); 
+               cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
                if (cpu_rev != 0x02000000) {
                        printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, 
%u MHz\n",
                                (cpu_rev >> 24) & 0xff,
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/delay.c
--- a/xen/arch/x86/delay.c      Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/delay.c      Sun Aug 20 11:08:45 2006 -0400
@@ -1,13 +1,13 @@
 /*
- *     Precise Delay Loops for i386
+ * Precise Delay Loops for i386
  *
- *     Copyright (C) 1993 Linus Torvalds
- *     Copyright (C) 1997 Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
+ * Copyright (C) 1993 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares <mj@xxxxxxxxxxxxxxxxxxxxxxxx>
  *
- *     The __delay function must _NOT_ be inlined as its execution time
- *     depends wildly on alignment on many x86 processors. The additional
- *     jump magic is needed to get the timing stable on all the CPU's
- *     we have to worry about.
+ * The __delay function must _NOT_ be inlined as its execution time
+ * depends wildly on alignment on many x86 processors. The additional
+ * jump magic is needed to get the timing stable on all the CPU's
+ * we have to worry about.
  */
 
 #include <xen/config.h>
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/dmi_scan.c
--- a/xen/arch/x86/dmi_scan.c   Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/dmi_scan.c   Sun Aug 20 11:08:45 2006 -0400
@@ -199,11 +199,11 @@ static __init __attribute__((unused)) in
 static __init __attribute__((unused)) int dmi_disable_acpi(struct 
dmi_blacklist *d) 
 { 
        if (!acpi_force) { 
-               printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); 
+               printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
                disable_acpi();
        } else { 
                printk(KERN_NOTICE 
-                      "Warning: DMI blacklist says broken, but acpi 
forced\n"); 
+                      "Warning: DMI blacklist says broken, but acpi forced\n");
        }
        return 0;
 } 
@@ -214,12 +214,12 @@ static __init __attribute__((unused)) in
 static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist 
*d) 
 { 
        if (!acpi_force) { 
-               printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", 
d->ident); 
+               printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", 
d->ident);
                disable_acpi();
-               acpi_ht = 1; 
+               acpi_ht = 1;
        } else { 
                printk(KERN_NOTICE 
-                      "Warning: acpi=force overrules DMI blacklist: 
acpi=ht\n"); 
+                      "Warning: acpi=force overrules DMI blacklist: 
acpi=ht\n");
        }
        return 0;
 } 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/dom0_ops.c   Sun Aug 20 11:08:45 2006 -0400
@@ -84,12 +84,12 @@ long arch_do_dom0_op(struct dom0_op *op,
 
     case DOM0_SHADOW_CONTROL:
     {
-        struct domain *d; 
+        struct domain *d;
         ret = -ESRCH;
         d = find_domain_by_id(op->u.shadow_control.domain);
         if ( d != NULL )
         {
-            ret = shadow_mode_control(d, &op->u.shadow_control);
+            ret = shadow2_control_op(d, &op->u.shadow_control, u_dom0_op);
             put_domain(d);
             copy_to_guest(u_dom0_op, op, 1);
         } 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/domain.c     Sun Aug 20 11:08:45 2006 -0400
@@ -134,13 +134,6 @@ struct vcpu *alloc_vcpu_struct(struct do
     v->arch.perdomain_ptes =
         d->arch.mm_perdomain_pt + (vcpu_id << GDT_LDT_VCPU_SHIFT);
 
-    v->arch.guest_vtable  = __linear_l2_table;
-    v->arch.shadow_vtable = __shadow_linear_l2_table;
-#if defined(__x86_64__)
-    v->arch.guest_vl3table = __linear_l3_table;
-    v->arch.guest_vl4table = __linear_l4_table;
-#endif
-
     pae_l3_cache_init(&v->arch.pae_l3_cache);
 
     return v;
@@ -155,9 +148,7 @@ int arch_domain_create(struct domain *d)
 {
     l1_pgentry_t gdt_l1e;
     int vcpuid, pdpt_order;
-#ifdef __x86_64__
     int i;
-#endif
 
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
     d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
@@ -202,8 +193,12 @@ int arch_domain_create(struct domain *d)
 
 #endif /* __x86_64__ */
 
-    shadow_lock_init(d);
-    INIT_LIST_HEAD(&d->arch.free_shadow_frames);
+    shadow2_lock_init(d);
+    for ( i = 0; i <= SHADOW2_MAX_ORDER; i++ )
+        INIT_LIST_HEAD(&d->arch.shadow2_freelists[i]);
+    INIT_LIST_HEAD(&d->arch.shadow2_p2m_freelist);
+    INIT_LIST_HEAD(&d->arch.shadow2_p2m_inuse);
+    INIT_LIST_HEAD(&d->arch.shadow2_toplevel_shadows);
 
     if ( !is_idle_domain(d) )
     {
@@ -234,6 +229,8 @@ int arch_domain_create(struct domain *d)
 
 void arch_domain_destroy(struct domain *d)
 {
+    shadow2_final_teardown(d);
+
     free_xenheap_pages(
         d->arch.mm_perdomain_pt,
         get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)));
@@ -328,31 +325,35 @@ int arch_set_info_guest(
         if ( !hvm_initialize_guest_resources(v) )
             return -EINVAL;
     }
-    else if ( shadow_mode_refcounts(d) )
-    {
-        if ( !get_page(mfn_to_page(cr3_pfn), d) )
+    else
+    {
+        if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
+                                PGT_base_page_table) )
         {
             destroy_gdt(v);
             return -EINVAL;
         }
-    }
-    else
-    {
-        if ( !get_page_and_type(mfn_to_page(cr3_pfn), d,
-                                PGT_base_page_table) )
-        {
-            destroy_gdt(v);
-            return -EINVAL;
-        }
-    }
-
-    update_pagetables(v);
+    }    
+
+    /* Shadow2: make sure the domain has enough shadow memory to
+     * boot another vcpu */
+    if ( shadow2_mode_enabled(d) 
+         && d->arch.shadow2_total_pages < shadow2_min_acceptable_pages(d) )
+    {
+        destroy_gdt(v);
+        return -ENOMEM;
+    }
 
     if ( v->vcpu_id == 0 )
         update_domain_wallclock_time(d);
 
     /* Don't redo final setup */
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
+
+    if ( shadow2_mode_enabled(d) )
+        shadow2_update_paging_modes(v);
+
+    update_cr3(v);
 
     return 0;
 }
@@ -555,7 +556,8 @@ static void load_segments(struct vcpu *n
             n->vcpu_info->evtchn_upcall_mask = 1;
 
         regs->entry_vector  = TRAP_syscall;
-        regs->rflags       &= 0xFFFCBEFFUL;
+        regs->rflags       &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
+                                X86_EFLAGS_NT|X86_EFLAGS_TF);
         regs->ss            = __GUEST_SS;
         regs->rsp           = (unsigned long)(rsp-11);
         regs->cs            = __GUEST_CS;
@@ -669,7 +671,6 @@ static void __context_switch(void)
             loaddebug(&n->arch.guest_context, 6);
             loaddebug(&n->arch.guest_context, 7);
         }
-
         n->arch.ctxt_switch_to(n);
     }
 
@@ -927,29 +928,34 @@ void domain_relinquish_resources(struct 
     /* Drop the in-use references to page-table bases. */
     for_each_vcpu ( d, v )
     {
-        if ( (pfn = pagetable_get_pfn(v->arch.guest_table)) != 0 )
-        {
-            if ( !shadow_mode_refcounts(d) )
-                put_page_type(mfn_to_page(pfn));
-            put_page(mfn_to_page(pfn));
-
+        /* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling,
+         * or sh2_update_paging_modes()) */
+        pfn = pagetable_get_pfn(v->arch.guest_table);
+        if ( pfn != 0 )
+        {
+            if ( shadow2_mode_refcounts(d) )
+                put_page(mfn_to_page(pfn));
+            else
+                put_page_and_type(mfn_to_page(pfn));
             v->arch.guest_table = pagetable_null();
         }
 
-        if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 )
-        {
-            if ( !shadow_mode_refcounts(d) )
-                put_page_type(mfn_to_page(pfn));
-            put_page(mfn_to_page(pfn));
-
+#ifdef __x86_64__
+        /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */
+        pfn = pagetable_get_pfn(v->arch.guest_table_user);
+        if ( pfn != 0 )
+        {
+            put_page_and_type(mfn_to_page(pfn));
             v->arch.guest_table_user = pagetable_null();
         }
+#endif
     }
 
     if ( d->vcpu[0] && hvm_guest(d->vcpu[0]) )
         hvm_relinquish_guest_resources(d);
 
-    shadow_mode_disable(d);
+    /* Tear down shadow mode stuff. */
+    shadow2_teardown(d);
 
     /*
      * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
@@ -964,26 +970,23 @@ void domain_relinquish_resources(struct 
 
     /* Free page used by xen oprofile buffer */
     free_xenoprof_pages(d);
-
 }
 
 void arch_dump_domain_info(struct domain *d)
 {
-    if ( shadow_mode_enabled(d) )
-    {
-        printk("    shadow mode: ");
-        if ( shadow_mode_refcounts(d) )
+    if ( shadow2_mode_enabled(d) )
+    {
+        printk("    shadow2 mode: ");
+        if ( d->arch.shadow2_mode & SHM2_enable )
+            printk("enabled ");
+        if ( shadow2_mode_refcounts(d) )
             printk("refcounts ");
-        if ( shadow_mode_write_all(d) )
-            printk("write_all ");
-        if ( shadow_mode_log_dirty(d) )
+        if ( shadow2_mode_log_dirty(d) )
             printk("log_dirty ");
-        if ( shadow_mode_translate(d) )
+        if ( shadow2_mode_translate(d) )
             printk("translate ");
-        if ( shadow_mode_external(d) )
+        if ( shadow2_mode_external(d) )
             printk("external ");
-        if ( shadow_mode_wr_pt_pte(d) )
-            printk("wr_pt_pte ");
         printk("\n");
     }
 }
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/domain_build.c       Sun Aug 20 11:08:45 2006 -0400
@@ -119,7 +119,7 @@ static void process_dom0_ioports_disable
                    "in dom0_ioports_disable, skipping\n", t);
             continue;
         }
-       
+
         if ( *u == '\0' )
             io_to = io_from;
         else if ( *u == '-' )
@@ -469,7 +469,7 @@ int construct_dom0(struct domain *d,
     {
         if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
         {
-            l1start = l1tab = (l1_pgentry_t *)mpt_alloc; 
+            l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
             mpt_alloc += PAGE_SIZE;
             *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT);
             l2tab++;
@@ -661,7 +661,7 @@ int construct_dom0(struct domain *d,
             if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
             {
                 if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
-                    l3start = l3tab = l4e_to_l3e(*++l4tab); 
+                    l3start = l3tab = l4e_to_l3e(*++l4tab);
                 l2start = l2tab = l3e_to_l2e(*l3tab);
             }
             l1start = l1tab = l2e_to_l1e(*l2tab);
@@ -683,8 +683,11 @@ int construct_dom0(struct domain *d,
     for ( i = 1; i < opt_dom0_max_vcpus; i++ )
         (void)alloc_vcpu(d, i, i);
 
-    /* Set up monitor table */
-    update_pagetables(v);
+    /* Set up CR3 value for write_ptbase */
+    if ( shadow2_mode_enabled(v->domain) )
+        shadow2_update_paging_modes(v);
+    else
+        update_cr3(v);
 
     /* Install the new page tables. */
     local_irq_disable();
@@ -796,10 +799,8 @@ int construct_dom0(struct domain *d,
     new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
 
     if ( opt_dom0_shadow )
-    {
-        shadow_mode_enable(d, SHM_enable);
-        update_pagetables(v);
-    }
+        if ( shadow2_test_enable(d) == 0 ) 
+            shadow2_update_paging_modes(v);
 
     if ( supervisor_mode_kernel )
     {
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/extable.c
--- a/xen/arch/x86/extable.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/extable.c    Sun Aug 20 11:08:45 2006 -0400
@@ -41,8 +41,8 @@ void sort_exception_tables(void)
 
 static inline unsigned long
 search_one_table(const struct exception_table_entry *first,
-                const struct exception_table_entry *last,
-                unsigned long value)
+                 const struct exception_table_entry *last,
+                 unsigned long value)
 {
     const struct exception_table_entry *mid;
     long diff;
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/genapic/bigsmp.c
--- a/xen/arch/x86/genapic/bigsmp.c     Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/genapic/bigsmp.c     Sun Aug 20 11:08:45 2006 -0400
@@ -41,7 +41,7 @@ static __init int probe_bigsmp(void)
                dmi_bigsmp = 1;
        else
                dmi_check_system(bigsmp_dmi_table);
-       return dmi_bigsmp; 
+       return dmi_bigsmp;
 } 
 
 struct genapic apic_bigsmp = {
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/genapic/es7000.h
--- a/xen/arch/x86/genapic/es7000.h     Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/genapic/es7000.h     Sun Aug 20 11:08:45 2006 -0400
@@ -71,8 +71,8 @@ struct es7000_mem_info {
        unsigned char type;   
        unsigned char length;
        unsigned char resv[6];
-       unsigned long long  start; 
-       unsigned long long  size; 
+       unsigned long long  start;
+       unsigned long long  size;
 };
 
 struct es7000_oem_table {
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/genapic/probe.c
--- a/xen/arch/x86/genapic/probe.c      Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/genapic/probe.c      Sun Aug 20 11:08:45 2006 -0400
@@ -65,7 +65,7 @@ void __init generic_apic_probe(void)
        for (i = 0; !changed && apic_probe[i]; i++) { 
                if (apic_probe[i]->probe()) {
                        changed = 1;
-                       genapic = apic_probe[i]; 
+                       genapic = apic_probe[i];
                } 
        }
        if (!changed) 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/hvm.c    Sun Aug 20 11:08:45 2006 -0400
@@ -30,6 +30,7 @@
 #include <xen/hypercall.h>
 #include <xen/guest_access.h>
 #include <xen/event.h>
+#include <xen/shadow.h>
 #include <asm/current.h>
 #include <asm/e820.h>
 #include <asm/io.h>
@@ -42,10 +43,6 @@
 #include <asm/spinlock.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
-#include <asm/shadow.h>
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/shadow_64.h>
-#endif
 #include <public/sched.h>
 #include <public/hvm/ioreq.h>
 #include <public/version.h>
@@ -61,7 +58,7 @@ static void hvm_zap_mmio_range(
 static void hvm_zap_mmio_range(
     struct domain *d, unsigned long pfn, unsigned long nr_pfn)
 {
-    unsigned long i, val = INVALID_MFN;
+    unsigned long i;
 
     ASSERT(d == current->domain);
 
@@ -70,7 +67,8 @@ static void hvm_zap_mmio_range(
         if ( pfn + i >= 0xfffff )
             break;
 
-        __copy_to_user(&phys_to_machine_mapping[pfn + i], &val, sizeof (val));
+        if ( VALID_MFN(gmfn_to_mfn(d, pfn + i)) )
+            guest_remove_page(d, pfn + i);
     }
 }
 
@@ -262,11 +260,13 @@ void hvm_setup_platform(struct domain* d
     if ( !hvm_guest(v) || (v->vcpu_id != 0) )
         return;
 
+#if 0 /* SHADOW2 does not have this */
     if ( shadow_direct_map_init(d) == 0 )
     {
         printk("Can not allocate shadow direct map for HVM domain.\n");
         domain_crash_synchronous();
     }
+#endif
 
     hvm_zap_iommu_pages(d);
 
@@ -345,11 +345,44 @@ int cpu_get_interrupt(struct vcpu *v, in
     return -1;
 }
 
+#include <asm/hvm/vmx/vmx.h>
+void hvm_hlt(unsigned long rflags)
+{
+    struct vcpu *v = current;
+    struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
+    s_time_t next_pit = -1, next_wakeup;
+
+    /*
+     * Detect machine shutdown.  Only do this for vcpu 0, to avoid potentially 
+     * shutting down the domain early. If we halt with interrupts disabled, 
+     * that's a pretty sure sign that we want to shut down.  In a real 
+     * processor, NMIs are the only way to break out of this.
+     */
+    if ( (v->vcpu_id == 0) && !(rflags & X86_EFLAGS_IF) )
+    {
+        printk("D%d: HLT with interrupts disabled -- shutting down.\n",
+               current->domain->domain_id);
+        domain_shutdown(current->domain, SHUTDOWN_poweroff);
+        return;
+    }
+
+    if ( !v->vcpu_id )
+        next_pit = get_scheduled(v, pt->irq, pt);
+    next_wakeup = get_apictime_scheduled(v);
+    if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
+        next_wakeup = next_pit;
+    if ( next_wakeup != - 1 ) 
+        set_timer(&current->arch.hvm_vcpu.hlt_timer, next_wakeup);
+    do_sched_op_compat(SCHEDOP_block, 0);
+}
+
 /*
  * Copy from/to guest virtual.
  */
 int hvm_copy(void *buf, unsigned long vaddr, int size, int dir)
 {
+    struct vcpu *v = current;
+    unsigned long gfn;
     unsigned long mfn;
     char *addr;
     int count;
@@ -359,10 +392,9 @@ int hvm_copy(void *buf, unsigned long va
         if (count > size)
             count = size;
 
-        if (hvm_paging_enabled(current))
-            mfn = gva_to_mfn(vaddr);
-        else
-            mfn = get_mfn_from_gpfn(vaddr >> PAGE_SHIFT);
+        gfn = shadow2_gva_to_gfn(v, vaddr);
+        mfn = mfn_x(sh2_vcpu_gfn_to_mfn(v, gfn));
+
         if (mfn == INVALID_MFN)
             return 0;
 
@@ -393,12 +425,12 @@ void hvm_print_line(struct vcpu *v, cons
 
     if (*index == HVM_PBUF_SIZE-2 || c == '\n') {
         if (*index == HVM_PBUF_SIZE-2)
-           pbuf[(*index)++] = c;
+            pbuf[(*index)++] = c;
         pbuf[*index] = '\0';
         printk("(GUEST: %u) %s\n", v->domain->domain_id, pbuf);
-       *index = 0;
+        *index = 0;
     } else
-       pbuf[(*index)++] = c;
+        pbuf[(*index)++] = c;
 }
 
 typedef unsigned long hvm_hypercall_t(
@@ -515,7 +547,7 @@ void hvm_do_hypercall(struct cpu_user_re
         return;
     }
 
-    if ( current->domain->arch.ops->guest_paging_levels == PAGING_L4 )
+    if ( current->arch.shadow2->guest_levels == 4 )
     {
         pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
                                                        pregs->rsi,
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/i8259.c
--- a/xen/arch/x86/hvm/i8259.c  Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/i8259.c  Sun Aug 20 11:08:45 2006 -0400
@@ -57,7 +57,7 @@ static inline void pic_set_irq1(PicState
         if (level) {
             if ((s->last_irr & mask) == 0) {
                 s->irr |= mask;
-           }
+            }
             s->last_irr |= mask;
         } else {
             s->last_irr &= ~mask;
@@ -237,7 +237,7 @@ static void update_shared_irr(struct hvm
     BUG_ON(!spin_is_locked(&s->lock));
 
     get_sp(current->domain)->sp_global.pic_elcr = 
-               s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
+        s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
     pl =(uint8_t*)&get_sp(current->domain)->sp_global.pic_last_irr;
     pe =(uint8_t*)&get_sp(current->domain)->sp_global.pic_elcr;
     if ( c == &s->pics[0] ) {
@@ -550,7 +550,7 @@ static int intercept_elcr_io(ioreq_t *p)
         spin_lock_irqsave(&s->lock, flags);
         elcr_ioport_write((void*)&s->pics[p->addr&1],
                 (uint32_t) p->addr, (uint32_t)( data & 0xff));
-       get_sp(current->domain)->sp_global.pic_elcr = 
+        get_sp(current->domain)->sp_global.pic_elcr = 
             s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
         spin_unlock_irqrestore(&s->lock, flags);
     }
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/intercept.c      Sun Aug 20 11:08:45 2006 -0400
@@ -284,7 +284,7 @@ static __inline__ void missed_ticks(stru
 
     missed_ticks = NOW() - pt->scheduled;
     if ( missed_ticks > 0 ) {
-       missed_ticks = missed_ticks / (s_time_t) pt->period + 1;
+        missed_ticks = missed_ticks / (s_time_t) pt->period + 1;
         if ( missed_ticks > 1000 ) {
             /* TODO: Adjust guest time togther */
             pt->pending_intr_nr++;
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/platform.c       Sun Aug 20 11:08:45 2006 -0400
@@ -21,7 +21,7 @@
 #include <xen/config.h>
 #include <xen/types.h>
 #include <xen/mm.h>
-#include <asm/shadow.h>
+#include <xen/shadow.h>
 #include <xen/domain_page.h>
 #include <asm/page.h>
 #include <xen/event.h>
@@ -35,9 +35,6 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <asm/current.h>
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/shadow_64.h>
-#endif
 
 #define DECODE_success  1
 #define DECODE_failure  0
@@ -724,7 +721,7 @@ void send_pio_req(struct cpu_user_regs *
 
     if (pvalid) {
         if (hvm_paging_enabled(current))
-            p->u.pdata = (void *) gva_to_gpa(value);
+            p->u.data = shadow2_gva_to_gpa(current, value);
         else
             p->u.pdata = (void *) value; /* guest VA == guest PA */
     } else
@@ -774,7 +771,7 @@ void send_mmio_req(
 
     if (pvalid) {
         if (hvm_paging_enabled(v))
-            p->u.pdata = (void *) gva_to_gpa(value);
+            p->u.data = shadow2_gva_to_gpa(v, value);
         else
             p->u.pdata = (void *) value; /* guest VA == guest PA */
     } else
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/instrlen.c
--- a/xen/arch/x86/hvm/svm/instrlen.c   Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/svm/instrlen.c   Sun Aug 20 11:08:45 2006 -0400
@@ -464,7 +464,7 @@ done_prefixes:
             case 4: insn_fetch(int32_t, 4, _regs.eip, length); break;
             }
             goto done;
-       }
+        }
         break;
     }
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/svm/svm.c        Sun Aug 20 11:08:45 2006 -0400
@@ -26,9 +26,10 @@
 #include <xen/irq.h>
 #include <xen/softirq.h>
 #include <xen/hypercall.h>
+#include <xen/domain_page.h>
 #include <asm/current.h>
 #include <asm/io.h>
-#include <asm/shadow.h>
+#include <asm/shadow2.h>
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -43,10 +44,6 @@
 #include <asm/hvm/svm/emulate.h>
 #include <asm/hvm/svm/vmmcall.h>
 #include <asm/hvm/svm/intr.h>
-#include <asm/shadow.h>
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/shadow_64.h>
-#endif
 #include <public/sched.h>
 
 #define SVM_EXTRA_DEBUG
@@ -61,7 +58,7 @@ extern int inst_copy_from_guest(unsigned
                                 int inst_len);
 extern asmlinkage void do_IRQ(struct cpu_user_regs *);
 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
-       unsigned long count, int size, long value, int dir, int pvalid);
+                         unsigned long count, int size, long value, int dir, 
int pvalid);
 extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
 extern void svm_dump_inst(unsigned long eip);
 extern int svm_dbg_on;
@@ -69,7 +66,7 @@ void svm_dump_regs(const char *from, str
 
 static void svm_relinquish_guest_resources(struct domain *d);
 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
-        struct cpu_user_regs *regs);
+                                            struct cpu_user_regs *regs);
 
 /* va of hardware host save area     */
 static void *hsa[NR_CPUS] __read_mostly;
@@ -110,7 +107,7 @@ void asidpool_init(int core)
     /* Host ASID is always in use */
     per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE;
     for ( i = 1; i < ASID_MAX; i++ )
-       per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
+        per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
 }
 
 
@@ -142,7 +139,7 @@ static int asidpool_fetch_next(struct vm
  *                           available.
  */
 int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
-                             int oldcore, int newcore )
+                          int oldcore, int newcore )
 {
     int i;
     int res = 1;
@@ -150,8 +147,8 @@ int asidpool_assign_next( struct vmcb_st
 
     spin_lock(&per_cpu(asid_pool,oldcore).asid_lock);
     if( retire_current && vmcb->guest_asid ) {
-       per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] = 
-           ASID_RETIRED;
+        per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] = 
+            ASID_RETIRED;
     }
     spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock);
     spin_lock(&per_cpu(asid_pool,newcore).asid_lock);
@@ -174,12 +171,12 @@ int asidpool_assign_next( struct vmcb_st
 
 void asidpool_retire( struct vmcb_struct *vmcb, int core )
 {
-   spin_lock(&per_cpu(asid_pool,core).asid_lock);
-   if( vmcb->guest_asid ) {
-       per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] = 
-           ASID_RETIRED;
-   }
-   spin_unlock(&per_cpu(asid_pool,core).asid_lock);
+    spin_lock(&per_cpu(asid_pool,core).asid_lock);
+    if( vmcb->guest_asid ) {
+        per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] = 
+            ASID_RETIRED;
+    }
+    spin_unlock(&per_cpu(asid_pool,core).asid_lock);
 }
 
 static inline void svm_inject_exception(struct vcpu *v, int trap, 
@@ -289,26 +286,26 @@ static inline int long_mode_do_msr_read(
         break;
 
     case MSR_STAR:
-         msr_content = vmcb->star;
-         break;
+        msr_content = vmcb->star;
+        break;
  
     case MSR_LSTAR:
-         msr_content = vmcb->lstar;
-         break;
+        msr_content = vmcb->lstar;
+        break;
  
     case MSR_CSTAR:
-         msr_content = vmcb->cstar;
-         break;
+        msr_content = vmcb->cstar;
+        break;
  
     case MSR_SYSCALL_MASK:
-         msr_content = vmcb->sfmask;
-         break;
+        msr_content = vmcb->sfmask;
+        break;
     default:
         return 0;
     }
 
     HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n", 
-            msr_content);
+                msr_content);
 
     regs->eax = msr_content & 0xffffffff;
     regs->edx = msr_content >> 32;
@@ -381,24 +378,24 @@ static inline int long_mode_do_msr_write
         break;
 
     case MSR_SHADOW_GS_BASE:
-         vmcb->kerngsbase = msr_content;
-         break;
+        vmcb->kerngsbase = msr_content;
+        break;
  
     case MSR_STAR:
-         vmcb->star = msr_content;
-         break;
+        vmcb->star = msr_content;
+        break;
  
     case MSR_LSTAR:
-         vmcb->lstar = msr_content;
-         break;
+        vmcb->lstar = msr_content;
+        break;
  
     case MSR_CSTAR:
-         vmcb->cstar = msr_content;
-         break;
+        vmcb->cstar = msr_content;
+        break;
  
     case MSR_SYSCALL_MASK:
-         vmcb->sfmask = msr_content;
-         break;
+        vmcb->sfmask = msr_content;
+        break;
 
     default:
         return 0;
@@ -414,7 +411,7 @@ static int svm_realmode(struct vcpu *v)
     return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
 }
 
-static int svm_instruction_length(struct vcpu *v)
+int svm_guest_x86_mode(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
@@ -423,10 +420,20 @@ static int svm_instruction_length(struct
         mode = vmcb->cs.attributes.fields.l ? 8 : 4;
     else
         mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
-    return svm_instrlen(guest_cpu_user_regs(), mode);
-}
-
-static unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
+    return mode;
+}
+
+int svm_instruction_length(struct vcpu *v)
+{
+    return svm_instrlen(guest_cpu_user_regs(), svm_guest_x86_mode(v));
+}
+
+void svm_update_host_cr3(struct vcpu *v)
+{
+    /* SVM doesn't have a HOST_CR3 equivalent to update. */
+}
+
+unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
 {
     switch ( num )
     {
@@ -436,6 +443,8 @@ static unsigned long svm_get_ctrl_reg(st
         return v->arch.hvm_svm.cpu_cr2;
     case 3:
         return v->arch.hvm_svm.cpu_cr3;
+    case 4:
+        return v->arch.hvm_svm.cpu_shadow_cr4;
     default:
         BUG();
     }
@@ -524,8 +533,6 @@ static void svm_init_hypercall_page(stru
     /* Don't support HYPERVISOR_iret at the moment */
     *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
 }
-
-
 
 
 int svm_dbg_on = 0;
@@ -574,9 +581,9 @@ static inline int svm_do_debugout(unsign
 
 #if 0
     if ((exit_code == 0x4E 
-                || exit_code == VMEXIT_CR0_READ 
-                || exit_code == VMEXIT_CR0_WRITE) 
-            && counter < 200000)
+         || exit_code == VMEXIT_CR0_READ 
+         || exit_code == VMEXIT_CR0_WRITE) 
+        && counter < 200000)
         return 0;
 
     if ((exit_code == 0x4E) && counter < 500000)
@@ -647,6 +654,11 @@ static void svm_load_cpu_guest_regs(
     svm_load_cpu_user_regs(v, regs);
 }
 
+int svm_long_mode_enabled(struct vcpu *v)
+{
+    return SVM_LONG_GUEST(v);
+}
+
 
 
 static void arch_svm_do_launch(struct vcpu *v) 
@@ -676,18 +688,18 @@ static void arch_svm_do_launch(struct vc
 #endif
     if (v->vcpu_id != 0) 
     {
-       u16     cs_sel = regs->cs;
-       /*
+        u16 cs_sel = regs->cs;
+        /*
          * This is the launch of an AP; set state so that we begin executing
-        * the trampoline code in real-mode.
+         * the trampoline code in real-mode.
          */
-       svm_do_vmmcall_reset_to_realmode(v, regs);      
-       /* Adjust the state to execute the trampoline code.*/
-       v->arch.hvm_svm.vmcb->rip = 0;
-       v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
-       v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
-    }
-       
+        svm_do_vmmcall_reset_to_realmode(v, regs);  
+        /* Adjust the state to execute the trampoline code.*/
+        v->arch.hvm_svm.vmcb->rip = 0;
+        v->arch.hvm_svm.vmcb->cs.sel= cs_sel;
+        v->arch.hvm_svm.vmcb->cs.base = (cs_sel << 4);
+    }
+      
     reset_stack_and_jump(svm_asm_do_launch);
 }
 
@@ -726,7 +738,6 @@ static void svm_final_setup_guest(struct
 static void svm_final_setup_guest(struct vcpu *v)
 {
     struct domain *d = v->domain;
-    struct vcpu *vc;
 
     v->arch.schedule_tail    = arch_svm_do_launch;
     v->arch.ctxt_switch_from = svm_ctxt_switch_from;
@@ -735,9 +746,12 @@ static void svm_final_setup_guest(struct
     if ( v != d->vcpu[0] )
         return;
 
-    /* Initialize monitor page table */
-    for_each_vcpu( d, vc )
-        vc->arch.monitor_table = pagetable_null();
+    if ( !shadow2_mode_external(d) )
+    {
+        DPRINTK("Can't init HVM for dom %u vcpu %u: "
+                "not in shadow2 external mode\n", d->domain_id, v->vcpu_id);
+        domain_crash(d);
+    }
 
     /* 
      * Required to do this once per domain
@@ -745,13 +759,6 @@ static void svm_final_setup_guest(struct
      */
     memset(&d->shared_info->evtchn_mask[0], 0xff, 
            sizeof(d->shared_info->evtchn_mask));       
-
-    /* 
-     * Put the domain in shadow mode even though we're going to be using
-     * the shared 1:1 page table initially. It shouldn't hurt 
-     */
-    shadow_mode_enable(d, SHM_enable|SHM_refcounts|
-                       SHM_translate|SHM_external|SHM_wr_pt_pte);
 }
 
 
@@ -769,7 +776,7 @@ int start_svm(void)
     u64 phys_hsa;
     int cpu = smp_processor_id();
  
-   /* Xen does not fill x86_capability words except 0. */
+    /* Xen does not fill x86_capability words except 0. */
     ecx = cpuid_ecx(0x80000001);
     boot_cpu_data.x86_capability[5] = ecx;
     
@@ -809,9 +816,13 @@ int start_svm(void)
 
     hvm_funcs.realmode = svm_realmode;
     hvm_funcs.paging_enabled = svm_paging_enabled;
+    hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
+    hvm_funcs.guest_x86_mode = svm_guest_x86_mode;
     hvm_funcs.instruction_length = svm_instruction_length;
     hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
 
+    hvm_funcs.update_host_cr3 = svm_update_host_cr3;
+    
     hvm_funcs.stts = svm_stts;
     hvm_funcs.set_tsc_offset = svm_set_tsc_offset;
 
@@ -834,8 +845,7 @@ static void svm_relinquish_guest_resourc
             continue;
 
         destroy_vmcb(&v->arch.hvm_svm);
-        free_monitor_pagetable(v);
-        kill_timer(&v->arch.hvm_svm.hlt_timer);
+        kill_timer(&v->arch.hvm_vcpu.hlt_timer);
         if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) ) 
         {
             kill_timer( &(VLAPIC(v)->vlapic_timer) );
@@ -851,8 +861,6 @@ static void svm_relinquish_guest_resourc
 
     if ( d->arch.hvm_domain.buffered_io_va )
         unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va);
-
-    shadow_direct_map_clean(d);
 }
 
 
@@ -863,7 +871,7 @@ static void svm_migrate_timers(struct vc
 
     if ( pt->enabled ) {
         migrate_timer( &pt->timer, v->processor );
-        migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
+        migrate_timer( &v->arch.hvm_vcpu.hlt_timer, v->processor );
     }
     if ( hvm_apic_support(v->domain) && VLAPIC( v ))
         migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
@@ -880,7 +888,7 @@ void arch_svm_do_resume(struct vcpu *v)
     else {
         if (svm_dbg_on)
             printk("VCPU core pinned: %d to %d\n", 
-                v->arch.hvm_svm.launch_core, smp_processor_id() );
+                   v->arch.hvm_svm.launch_core, smp_processor_id() );
         v->arch.hvm_svm.launch_core = smp_processor_id();
         svm_migrate_timers( v );
         hvm_do_resume( v );
@@ -894,7 +902,6 @@ static int svm_do_page_fault(unsigned lo
 {
     struct vcpu *v = current;
     unsigned long eip;
-    unsigned long gpa; /* FIXME: PAE */
     int result;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
@@ -903,47 +910,11 @@ static int svm_do_page_fault(unsigned lo
 //#if HVM_DEBUG
     eip = vmcb->rip;
     HVM_DBG_LOG(DBG_LEVEL_VMMU, 
-            "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
-            va, eip, (unsigned long)regs->error_code);
+                "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
+                va, eip, (unsigned long)regs->error_code);
 //#endif
 
-    if ( !svm_paging_enabled(v) )
-    {
-        if ( shadow_direct_map_fault(va, regs) ) 
-            return 1;
-
-        handle_mmio(va, va);
-        return 1;
-    }
-
-
-    gpa = gva_to_gpa(va);
-
-    /* Use 1:1 page table to identify MMIO address space */
-    if (mmio_space(gpa))
-    {
-        /* No support for APIC */
-        if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
-        { 
-            int inst_len;
-            inst_len = svm_instruction_length(v);
-            if (inst_len == -1)
-            {
-                printf("%s: INST_LEN - Unable to decode properly\n", __func__);
-                domain_crash_synchronous();
-            }
-
-            __update_guest_eip(vmcb, inst_len);
-
-            return 1;
-        }
-
-        handle_mmio(va, gpa);
-
-        return 1;
-    }
-    
-    result = shadow_fault(va, regs);
+    result = shadow2_fault(va, regs); 
 
     if( result ) {
         /* Let's make sure that the Guest TLB is flushed */
@@ -967,7 +938,7 @@ static void svm_do_no_device_fault(struc
 
 
 static void svm_do_general_protection_fault(struct vcpu *v, 
-        struct cpu_user_regs *regs) 
+                                            struct cpu_user_regs *regs) 
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned long eip, error_code;
@@ -981,7 +952,7 @@ static void svm_do_general_protection_fa
         printf("Huh? We got a GP Fault with an invalid IDTR!\n");
         svm_dump_vmcb(__func__, vmcb);
         svm_dump_regs(__func__, regs);
-        svm_dump_inst(vmcb->rip); 
+        svm_dump_inst(vmcb->rip);
         __hvm_bug(regs);
     }
 
@@ -990,10 +961,10 @@ static void svm_do_general_protection_fa
                 eip, error_code);
 
     HVM_DBG_LOG(DBG_LEVEL_1, 
-            "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
-            (unsigned long)regs->eax, (unsigned long)regs->ebx,
-            (unsigned long)regs->ecx, (unsigned long)regs->edx,
-            (unsigned long)regs->esi, (unsigned long)regs->edi);
+                "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
+                (unsigned long)regs->eax, (unsigned long)regs->ebx,
+                (unsigned long)regs->ecx, (unsigned long)regs->edx,
+                (unsigned long)regs->esi, (unsigned long)regs->edi);
       
     /* Reflect it back into the guest */
     svm_inject_exception(v, TRAP_gp_fault, 1, error_code);
@@ -1005,7 +976,7 @@ static void svm_do_general_protection_fa
 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
 
 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input, 
-        struct cpu_user_regs *regs) 
+                                struct cpu_user_regs *regs) 
 {
     unsigned int eax, ebx, ecx, edx;
     unsigned long eip;
@@ -1017,37 +988,30 @@ static void svm_vmexit_do_cpuid(struct v
     eip = vmcb->rip;
 
     HVM_DBG_LOG(DBG_LEVEL_1, 
-            "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
-            " (esi) %lx, (edi) %lx",
-            (unsigned long)regs->eax, (unsigned long)regs->ebx,
-            (unsigned long)regs->ecx, (unsigned long)regs->edx,
-            (unsigned long)regs->esi, (unsigned long)regs->edi);
+                "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
+                " (esi) %lx, (edi) %lx",
+                (unsigned long)regs->eax, (unsigned long)regs->ebx,
+                (unsigned long)regs->ecx, (unsigned long)regs->edx,
+                (unsigned long)regs->esi, (unsigned long)regs->edi);
 
     cpuid(input, &eax, &ebx, &ecx, &edx);
 
     if (input == 0x00000001)
     {
         if ( !hvm_apic_support(v->domain) ||
-                !vlapic_global_enabled((VLAPIC(v))) )
+             !vlapic_global_enabled((VLAPIC(v))) )
         {
             /* Since the apic is disabled, avoid any confusion 
                about SMP cpus being available */
             clear_bit(X86_FEATURE_APIC, &edx);
         }
 
-#if CONFIG_PAGING_LEVELS < 3
-        clear_bit(X86_FEATURE_PAE, &edx);
-        clear_bit(X86_FEATURE_PSE, &edx);
+#if CONFIG_PAGING_LEVELS >= 3
+        if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
+#endif
+            clear_bit(X86_FEATURE_PAE, &edx);
         clear_bit(X86_FEATURE_PSE36, &edx);
-#else
-        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
-        {
-            if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
-                clear_bit(X86_FEATURE_PAE, &edx);
-            clear_bit(X86_FEATURE_PSE, &edx);
-            clear_bit(X86_FEATURE_PSE36, &edx);
-        }
-#endif
+
         /* Clear out reserved bits. */
         ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
         edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
@@ -1097,23 +1061,12 @@ static void svm_vmexit_do_cpuid(struct v
         clear_bit(X86_FEATURE_SYSCALL & 31, &edx);
 #endif
 
-#if CONFIG_PAGING_LEVELS < 3
-        clear_bit(X86_FEATURE_NX & 31, &edx);
-        clear_bit(X86_FEATURE_PAE, &edx);
-        clear_bit(X86_FEATURE_PSE, &edx);
+
+#if CONFIG_PAGING_LEVELS >= 3
+        if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
+#endif
+            clear_bit(X86_FEATURE_PAE, &edx);
         clear_bit(X86_FEATURE_PSE36, &edx);
-#else
-        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
-        {
-            if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
-            {
-                clear_bit(X86_FEATURE_NX & 31, &edx);
-                clear_bit(X86_FEATURE_PAE, &edx);
-            }
-            clear_bit(X86_FEATURE_PSE, &edx);
-            clear_bit(X86_FEATURE_PSE36, &edx);
-        }
-#endif
 
         /* Make SVM feature invisible to the guest. */
         clear_bit(X86_FEATURE_SVME & 31, &ecx);
@@ -1138,9 +1091,9 @@ static void svm_vmexit_do_cpuid(struct v
     regs->edx = (unsigned long)edx;
 
     HVM_DBG_LOG(DBG_LEVEL_1, 
-            "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
-            "ebx=%x, ecx=%x, edx=%x",
-            eip, input, eax, ebx, ecx, edx);
+                "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
+                "ebx=%x, ecx=%x, edx=%x",
+                eip, input, eax, ebx, ecx, edx);
 
     inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
     ASSERT(inst_len > 0);
@@ -1149,7 +1102,7 @@ static void svm_vmexit_do_cpuid(struct v
 
 
 static inline unsigned long *get_reg_p(unsigned int gpreg, 
-        struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
+                                       struct cpu_user_regs *regs, struct 
vmcb_struct *vmcb)
 {
     unsigned long *reg_p = NULL;
     switch (gpreg)
@@ -1213,7 +1166,7 @@ static inline unsigned long *get_reg_p(u
 
 
 static inline unsigned long get_reg(unsigned int gpreg, 
-        struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
+                                    struct cpu_user_regs *regs, struct 
vmcb_struct *vmcb)
 {
     unsigned long *gp;
     gp = get_reg_p(gpreg, regs, vmcb);
@@ -1222,7 +1175,7 @@ static inline unsigned long get_reg(unsi
 
 
 static inline void set_reg(unsigned int gpreg, unsigned long value, 
-        struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
+                           struct cpu_user_regs *regs, struct vmcb_struct 
*vmcb)
 {
     unsigned long *gp;
     gp = get_reg_p(gpreg, regs, vmcb);
@@ -1231,7 +1184,7 @@ static inline void set_reg(unsigned int 
                            
 
 static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
-        struct cpu_user_regs *regs)
+                           struct cpu_user_regs *regs)
 {
     unsigned long *reg_p = 0;
     unsigned int gpreg = 0;
@@ -1259,7 +1212,7 @@ static void svm_dr_access (struct vcpu *
     ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
 
     HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
-            eip, reg, gpreg);
+                eip, reg, gpreg);
 
     reg_p = get_reg_p(gpreg, regs, vmcb);
         
@@ -1267,7 +1220,7 @@ static void svm_dr_access (struct vcpu *
     {
     case TYPE_MOV_TO_DR: 
         inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
-        v->arch.guest_context.debugreg[reg] = *reg_p; 
+        v->arch.guest_context.debugreg[reg] = *reg_p;
         break;
     case TYPE_MOV_FROM_DR:
         inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
@@ -1291,7 +1244,7 @@ static void svm_get_prefix_info(
 
     memset(inst, 0, MAX_INST_LEN);
     if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst)) 
-            != MAX_INST_LEN) 
+        != MAX_INST_LEN) 
     {
         printk("%s: get guest instruction failed\n", __func__);
         domain_crash_synchronous();
@@ -1555,6 +1508,7 @@ static int svm_set_cr0(unsigned long val
     unsigned long mfn;
     int paging_enabled;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned long old_base_mfn;
   
     ASSERT(vmcb);
 
@@ -1577,8 +1531,8 @@ static int svm_set_cr0(unsigned long val
     {
         /* The guest CR3 must be pointing to the guest physical. */
         if (!VALID_MFN(mfn = 
-                    get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
-                || !get_page(mfn_to_page(mfn), v->domain))
+                       get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> 
PAGE_SHIFT))
+            || !get_page(mfn_to_page(mfn), v->domain))
         {
             printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
             domain_crash_synchronous(); /* need to take a clean path */
@@ -1586,8 +1540,8 @@ static int svm_set_cr0(unsigned long val
 
 #if defined(__x86_64__)
         if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state) 
-                && !test_bit(SVM_CPU_STATE_PAE_ENABLED, 
-                    &v->arch.hvm_svm.cpu_state))
+            && !test_bit(SVM_CPU_STATE_PAE_ENABLED, 
+                         &v->arch.hvm_svm.cpu_state))
         {
             HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
             svm_inject_exception(v, TRAP_gp_fault, 1, 0);
@@ -1600,60 +1554,27 @@ static int svm_set_cr0(unsigned long val
             set_bit(SVM_CPU_STATE_LMA_ENABLED,
                     &v->arch.hvm_svm.cpu_state);
             vmcb->efer |= (EFER_LMA | EFER_LME);
-            if (!shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
-            {
-                printk("Unsupported guest paging levels\n");
-                domain_crash_synchronous(); /* need to take a clean path */
-            }
-        }
-        else
+        }
 #endif  /* __x86_64__ */
-        {
-#if CONFIG_PAGING_LEVELS >= 3
-            /* seems it's a 32-bit or 32-bit PAE guest */
-            if ( test_bit(SVM_CPU_STATE_PAE_ENABLED,
-                        &v->arch.hvm_svm.cpu_state) )
-            {
-                /* The guest enables PAE first and then it enables PG, it is
-                 * really a PAE guest */
-                if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
-                {
-                    printk("Unsupported guest paging levels\n");
-                    domain_crash_synchronous();
-                }
-            }
-            else
-            {
-                if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
-                {
-                    printk("Unsupported guest paging levels\n");
-                    domain_crash_synchronous(); /* need to take a clean path */
-                }
-            }
-#endif
-        }
 
         /* Now arch.guest_table points to machine physical. */
+        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
         v->arch.guest_table = pagetable_from_pfn(mfn);
-        update_pagetables(v);
+        if ( old_base_mfn )
+            put_page(mfn_to_page(old_base_mfn));
+        shadow2_update_paging_modes(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
-                (unsigned long) (mfn << PAGE_SHIFT));
-
+                    (unsigned long) (mfn << PAGE_SHIFT));
+
+        vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-        vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
-
-        /* arch->shadow_table should hold the next CR3 for shadow */
-        HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n", 
-                    v->arch.hvm_svm.cpu_cr3, mfn);
-
-        return 1;
     }
 
     if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
         if ( v->arch.hvm_svm.cpu_cr3 ) {
             put_page(mfn_to_page(get_mfn_from_gpfn(
-                      v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
+                v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
             v->arch.guest_table = pagetable_null();
         }
 
@@ -1667,17 +1588,16 @@ static int svm_set_cr0(unsigned long val
             svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             return 0;
         }
-
-        clear_all_shadow_status( v->domain );
+        shadow2_update_paging_modes(v);
+        vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-        vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
     }
     else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
     {
         /* we should take care of this kind of situation */
-        clear_all_shadow_status(v->domain);
+        shadow2_update_paging_modes(v);
+        vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-        vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
     }
 
     return 1;
@@ -1701,7 +1621,7 @@ static void mov_from_cr(int cr, int gp, 
         value = v->arch.hvm_svm.cpu_shadow_cr0;
         if (svm_dbg_on)
             printk("CR0 read =%lx \n", value );
-          break;
+        break;
     case 2:
         value = vmcb->cr2;
         break;
@@ -1709,11 +1629,11 @@ static void mov_from_cr(int cr, int gp, 
         value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
         if (svm_dbg_on)
             printk("CR3 read =%lx \n", value );
-          break;
+        break;
     case 4:
         value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
         if (svm_dbg_on)
-           printk( "CR4 read=%lx\n", value );
+            printk( "CR4 read=%lx\n", value );
         break;
     case 8:
 #if 0
@@ -1735,7 +1655,7 @@ static void mov_from_cr(int cr, int gp, 
 
 static inline int svm_pgbit_test(struct vcpu *v)
 {
-   return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
+    return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
 }
 
 
@@ -1786,7 +1706,7 @@ static int mov_to_cr(int gpreg, int cr, 
             mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
             if (mfn != pagetable_get_pfn(v->arch.guest_table))
                 __hvm_bug(regs);
-            shadow_sync_all(v->domain);
+            shadow2_update_cr3(v);
         }
         else 
         {
@@ -1796,8 +1716,8 @@ static int mov_to_cr(int gpreg, int cr, 
              */
             HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
             if (((value >> PAGE_SHIFT) > v->domain->max_pages) 
-                    || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
-                    || !get_page(mfn_to_page(mfn), v->domain))
+                || !VALID_MFN(mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT))
+                || !get_page(mfn_to_page(mfn), v->domain))
             {
                 printk("Invalid CR3 value=%lx\n", value);
                 domain_crash_synchronous(); /* need to take a clean path */
@@ -1812,14 +1732,10 @@ static int mov_to_cr(int gpreg, int cr, 
             /*
              * arch.shadow_table should now hold the next CR3 for shadow
              */
-#if CONFIG_PAGING_LEVELS >= 3
-            if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
-                shadow_sync_all(v->domain);
-#endif
             v->arch.hvm_svm.cpu_cr3 = value;
-            update_pagetables(v);
+            update_cr3(v);
+            vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
             HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
-            vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
         }
         break;
     }
@@ -1828,7 +1744,7 @@ static int mov_to_cr(int gpreg, int cr, 
     {
         if (svm_dbg_on)
             printk( "write cr4=%lx, cr0=%lx\n", 
-                     value,  v->arch.hvm_svm.cpu_shadow_cr0 );
+                    value,  v->arch.hvm_svm.cpu_shadow_cr0 );
         old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
         if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
         {
@@ -1839,35 +1755,28 @@ static int mov_to_cr(int gpreg, int cr, 
 #if CONFIG_PAGING_LEVELS >= 3
                 unsigned long mfn, old_base_mfn;
 
-                if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
-                {
-                    printk("Unsupported guest paging levels\n");
-                    domain_crash_synchronous(); /* need to take a clean path */
-                }
-
                 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
-                                    v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
+                    v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)) ||
                      !get_page(mfn_to_page(mfn), v->domain) )
                 {
                     printk("Invalid CR3 value = %lx", v->arch.hvm_svm.cpu_cr3);
                     domain_crash_synchronous(); /* need to take a clean path */
                 }
 
-                old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-                if ( old_base_mfn )
-                    put_page(mfn_to_page(old_base_mfn));
-
                 /*
                  * Now arch.guest_table points to machine physical.
                  */
 
+                old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
                 v->arch.guest_table = pagetable_from_pfn(mfn);
-                update_pagetables(v);
+                if ( old_base_mfn )
+                    put_page(mfn_to_page(old_base_mfn));
+                shadow2_update_paging_modes(v);
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                             (unsigned long) (mfn << PAGE_SHIFT));
 
-                vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
+                vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
 
                 /*
                  * arch->shadow_table should hold the next CR3 for shadow
@@ -1876,33 +1785,6 @@ static int mov_to_cr(int gpreg, int cr, 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, 
                             "Update CR3 value = %lx, mfn = %lx",
                             v->arch.hvm_svm.cpu_cr3, mfn);
-#endif
-            }
-            else
-            {
-                /*  The guest is a 64 bit or 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 3
-                if ( (v->domain->arch.ops != NULL) &&
-                        v->domain->arch.ops->guest_paging_levels == PAGING_L2)
-                {
-                    /* Seems the guest first enables PAE without enabling PG,
-                     * it must enable PG after that, and it is a 32-bit PAE
-                     * guest */
-
-                    if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3))
-                    {
-                        printk("Unsupported guest paging levels\n");
-                        domain_crash_synchronous();
-                    }                   
-                }
-                else
-                {
-                    if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L4))
-                    {
-                        printk("Unsupported guest paging levels\n");
-                        domain_crash_synchronous();
-                    }
-                }
 #endif
             }
         }
@@ -1926,7 +1808,7 @@ static int mov_to_cr(int gpreg, int cr, 
         if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
         {
             set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-            shadow_sync_all(v->domain);
+            shadow2_update_paging_modes(v);
         }
         break;
     }
@@ -1944,7 +1826,7 @@ static int mov_to_cr(int gpreg, int cr, 
 
 
 static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
-        struct cpu_user_regs *regs)
+                         struct cpu_user_regs *regs)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     int inst_len = 0;
@@ -1968,13 +1850,13 @@ static int svm_cr_access(struct vcpu *v,
     
     if (type == TYPE_MOV_TO_CR) 
     {
-        inst_len = __get_instruction_length_from_list(vmcb, list_a, 
-                ARR_SIZE(list_a), &buffer[index], &match);
+        inst_len = __get_instruction_length_from_list(
+            vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
     }
     else
     {
-        inst_len = __get_instruction_length_from_list(vmcb, list_b, 
-                ARR_SIZE(list_b), &buffer[index], &match);
+        inst_len = __get_instruction_length_from_list(
+            vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
     }
 
     ASSERT(inst_len > 0);
@@ -2016,7 +1898,7 @@ static int svm_cr_access(struct vcpu *v,
 
         if (svm_dbg_on)
             printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg, 
-                    inst_len);
+                   inst_len);
 
         value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
 
@@ -2035,7 +1917,7 @@ static int svm_cr_access(struct vcpu *v,
 
         if (svm_dbg_on)
             printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg, 
-                    inst_len);
+                   inst_len);
         break;
 
     default:
@@ -2061,9 +1943,9 @@ static inline void svm_do_msr_access(
     ASSERT(vmcb);
 
     HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
-            "exitinfo = %lx", (unsigned long)regs->ecx, 
-            (unsigned long)regs->eax, (unsigned long)regs->edx, 
-            (unsigned long)vmcb->exitinfo1);
+                "exitinfo = %lx", (unsigned long)regs->ecx, 
+                (unsigned long)regs->eax, (unsigned long)regs->edx, 
+                (unsigned long)vmcb->exitinfo1);
 
     /* is it a read? */
     if (vmcb->exitinfo1 == 0)
@@ -2133,7 +2015,7 @@ static inline void svm_do_msr_access(
         }
     }
 
-done:
+ done:
 
     HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
                 "ecx=%lx, eax=%lx, edx=%lx",
@@ -2144,29 +2026,16 @@ done:
 }
 
 
-/*
- * Need to use this exit to reschedule
- */
 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
 {
-    struct vcpu *v = current;
-    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
-    s_time_t  next_pit = -1, next_wakeup;
-
     __update_guest_eip(vmcb, 1);
 
-    /* check for interrupt not handled or new interrupt */
-    if ( vmcb->vintr.fields.irq || cpu_has_pending_irq(v) )
-       return;
-
-    if ( !v->vcpu_id )
-        next_pit = get_scheduled(v, pt->irq, pt);
-    next_wakeup = get_apictime_scheduled(v);
-    if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
-        next_wakeup = next_pit;
-    if ( next_wakeup != - 1 )
-        set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
-    do_sched_op_compat(SCHEDOP_block, 0);
+    /* Check for interrupt not handled or new interrupt. */
+    if ( (vmcb->rflags & X86_EFLAGS_IF) &&
+         (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) )
+        return;
+
+    hvm_hlt(vmcb->rflags);
 }
 
 
@@ -2193,7 +2062,7 @@ static void svm_vmexit_do_invd(struct vm
 
 #ifdef XEN_DEBUGGER
 static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb, 
-        struct cpu_user_regs *regs)
+                                         struct cpu_user_regs *regs)
 {
     regs->eip = vmcb->rip;
     regs->esp = vmcb->rsp;
@@ -2241,7 +2110,7 @@ void svm_handle_invlpg(const short invlp
     {
         printk("svm_handle_invlpg (): Error reading memory %d bytes\n", 
                length);
-       __hvm_bug(regs);
+        __hvm_bug(regs);
     }
 
     if (invlpga)
@@ -2272,7 +2141,7 @@ void svm_handle_invlpg(const short invlp
          * the system in either 32- or 64-bit mode.
          */
         g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, 
-                            &opcode[inst_len], &length);
+                                             &opcode[inst_len], &length);
 
         inst_len += length;
         __update_guest_eip (vmcb, inst_len);
@@ -2280,7 +2149,7 @@ void svm_handle_invlpg(const short invlp
 
     /* Overkill, we may not this */
     set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-    shadow_invlpg(v, g_vaddr);
+    shadow2_invlpg(v, g_vaddr);
 }
 
 
@@ -2291,7 +2160,7 @@ void svm_handle_invlpg(const short invlp
  * returns 0 on success, non-zero otherwise
  */
 static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v, 
-        struct cpu_user_regs *regs)
+                                            struct cpu_user_regs *regs)
 {
     struct vmcb_struct *vmcb;
 
@@ -2651,10 +2520,10 @@ void walk_shadow_and_guest_pt(unsigned l
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned long gpa;
 
-    gpa = gva_to_gpa( gva );
+    gpa = shadow2_gva_to_gpa(current, gva);
     printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
     if( !svm_paging_enabled(v) || mmio_space(gpa) )
-       return;
+        return;
 
     /* let's dump the guest and shadow page info */
 
@@ -2675,8 +2544,12 @@ void walk_shadow_and_guest_pt(unsigned l
     __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ],
                      sizeof(gpte) );
     printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
-    __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ],
+
+    BUG(); // need to think about this, and convert usage of
+    // phys_to_machine_mapping to use pagetable format...
+    __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], 
                       sizeof(spte) );
+
     printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
 }
 #endif /* SVM_WALK_GUEST_PAGES */
@@ -2708,99 +2581,105 @@ asmlinkage void svm_vmexit_handler(struc
     }
 
 #ifdef SVM_EXTRA_DEBUG
-{
+    {
 #if defined(__i386__)
-#define        rip     eip
+#define rip eip
 #endif
 
-    static unsigned long intercepts_counter = 0;
-
-    if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 
-    {
-        if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
-        {
-            printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx, "
-                   "gpa=%llx\n", intercepts_counter,
-                    exit_reasons[exit_reason], exit_reason, regs.cs,
-                   (unsigned long long) regs.rip,
-                   (unsigned long long) vmcb->exitinfo1,
-                   (unsigned long long) vmcb->exitinfo2,
-                   (unsigned long long) vmcb->exitintinfo.bytes,
-            (unsigned long long) gva_to_gpa( vmcb->exitinfo2 ) );
-        }
-        else 
-        {
-            printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", 
-                    intercepts_counter,
-                    exit_reasons[exit_reason], exit_reason, regs.cs,
-                   (unsigned long long) regs.rip,
-                   (unsigned long long) vmcb->exitinfo1,
-                   (unsigned long long) vmcb->exitinfo2,
-                   (unsigned long long) vmcb->exitintinfo.bytes );
-        }
-    } 
-    else if ( svm_dbg_on 
-              && exit_reason != VMEXIT_IOIO 
-              && exit_reason != VMEXIT_INTR) 
-    {
-
-        if (exit_reasons[exit_reason])
-        {
-            printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", 
-                    intercepts_counter,
-                    exit_reasons[exit_reason], exit_reason, regs.cs,
-                   (unsigned long long) regs.rip,
-                   (unsigned long long) vmcb->exitinfo1,
-                   (unsigned long long) vmcb->exitinfo2,
-                   (unsigned long long) vmcb->exitintinfo.bytes);
+        static unsigned long intercepts_counter = 0;
+
+        if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 
+        {
+            if (svm_paging_enabled(v) && 
+                !mmio_space(shadow2_gva_to_gpa(current, vmcb->exitinfo2)))
+            {
+                printk("I%08ld,ExC=%s(%d),IP=%x:%llx,"
+                       "I1=%llx,I2=%llx,INT=%llx, "
+                       "gpa=%llx\n", intercepts_counter,
+                       exit_reasons[exit_reason], exit_reason, regs.cs,
+                       (unsigned long long) regs.rip,
+                       (unsigned long long) vmcb->exitinfo1,
+                       (unsigned long long) vmcb->exitinfo2,
+                       (unsigned long long) vmcb->exitintinfo.bytes,
+                       (unsigned long long) shadow2_gva_to_gpa(current, 
vmcb->exitinfo2));
+            }
+            else 
+            {
+                printk("I%08ld,ExC=%s(%d),IP=%x:%llx,"
+                       "I1=%llx,I2=%llx,INT=%llx\n", 
+                       intercepts_counter,
+                       exit_reasons[exit_reason], exit_reason, regs.cs,
+                       (unsigned long long) regs.rip,
+                       (unsigned long long) vmcb->exitinfo1,
+                       (unsigned long long) vmcb->exitinfo2,
+                       (unsigned long long) vmcb->exitintinfo.bytes );
+            }
         } 
-        else 
-        {
-            
printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", 
-                    intercepts_counter, exit_reason, exit_reason, regs.cs, 
-                   (unsigned long long) regs.rip,
-                   (unsigned long long) vmcb->exitinfo1,
-                   (unsigned long long) vmcb->exitinfo2,
-                   (unsigned long long) vmcb->exitintinfo.bytes);
-        }
-    }
+        else if ( svm_dbg_on 
+                  && exit_reason != VMEXIT_IOIO 
+                  && exit_reason != VMEXIT_INTR) 
+        {
+
+            if (exit_reasons[exit_reason])
+            {
+                printk("I%08ld,ExC=%s(%d),IP=%x:%llx,"
+                       "I1=%llx,I2=%llx,INT=%llx\n", 
+                       intercepts_counter,
+                       exit_reasons[exit_reason], exit_reason, regs.cs,
+                       (unsigned long long) regs.rip,
+                       (unsigned long long) vmcb->exitinfo1,
+                       (unsigned long long) vmcb->exitinfo2,
+                       (unsigned long long) vmcb->exitintinfo.bytes);
+            } 
+            else 
+            {
+                printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,"
+                       "I1=%llx,I2=%llx,INT=%llx\n", 
+                       intercepts_counter, exit_reason, exit_reason, regs.cs, 
+                       (unsigned long long) regs.rip,
+                       (unsigned long long) vmcb->exitinfo1,
+                       (unsigned long long) vmcb->exitinfo2,
+                       (unsigned long long) vmcb->exitintinfo.bytes);
+            }
+        }
 
 #ifdef SVM_WALK_GUEST_PAGES
-    if( exit_reason == VMEXIT_EXCEPTION_PF 
-        && ( ( vmcb->exitinfo2 == vmcb->rip )
-        || vmcb->exitintinfo.bytes) )
-    {
-       if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))  
   
-           walk_shadow_and_guest_pt( vmcb->exitinfo2 );
-    }
+        if( exit_reason == VMEXIT_EXCEPTION_PF 
+            && ( ( vmcb->exitinfo2 == vmcb->rip )
+                 || vmcb->exitintinfo.bytes) )
+        {
+            if ( svm_paging_enabled(v) &&
+                 !mmio_space(gva_to_gpa(vmcb->exitinfo2)) )
+                walk_shadow_and_guest_pt(vmcb->exitinfo2);
+        }
 #endif
 
-    intercepts_counter++;
+        intercepts_counter++;
 
 #if 0
-    if (svm_dbg_on)
-        do_debug = svm_do_debugout(exit_reason);
+        if (svm_dbg_on)
+            do_debug = svm_do_debugout(exit_reason);
 #endif
 
-    if (do_debug)
-    {
-        printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
-                "shadow_table = 0x%08x\n", 
-                __func__,
-               (int) v->arch.guest_table.pfn,
-               (int) v->arch.monitor_table.pfn, 
-                (int) v->arch.shadow_table.pfn);
-
-        svm_dump_vmcb(__func__, vmcb);
-        svm_dump_regs(__func__, &regs);
-        svm_dump_inst(svm_rip2pointer(vmcb));
-    }
+        if (do_debug)
+        {
+            printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
+                   "shadow_table = 0x%08x\n", 
+                   __func__,
+                   (int) v->arch.guest_table.pfn,
+                   (int) v->arch.monitor_table.pfn, 
+                   (int) v->arch.shadow_table.pfn);
+
+            svm_dump_vmcb(__func__, vmcb);
+            svm_dump_regs(__func__, &regs);
+            svm_dump_inst(svm_rip2pointer(vmcb));
+        }
 
 #if defined(__i386__)
-#undef rip
+#undef rip
 #endif
 
-}
+    }
 #endif /* SVM_EXTRA_DEBUG */
 
 
@@ -2811,7 +2690,7 @@ asmlinkage void svm_vmexit_handler(struc
     if (do_debug)
     {
         printk("eip = %lx, exit_reason = %d (0x%x)\n", 
-                eip, exit_reason, exit_reason);
+               eip, exit_reason, exit_reason);
     }
 #endif /* SVM_EXTRA_DEBUG */
 
@@ -2880,10 +2759,10 @@ asmlinkage void svm_vmexit_handler(struc
         va = vmcb->exitinfo2;
         regs.error_code = vmcb->exitinfo1;
         HVM_DBG_LOG(DBG_LEVEL_VMMU, 
-                "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
-                (unsigned long)regs.eax, (unsigned long)regs.ebx,
-                (unsigned long)regs.ecx, (unsigned long)regs.edx,
-                (unsigned long)regs.esi, (unsigned long)regs.edi);
+                    "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
+                    (unsigned long)regs.eax, (unsigned long)regs.ebx,
+                    (unsigned long)regs.ecx, (unsigned long)regs.edx,
+                    (unsigned long)regs.esi, (unsigned long)regs.edi);
 
         if (!(error = svm_do_page_fault(va, &regs))) 
         {
@@ -2893,7 +2772,7 @@ asmlinkage void svm_vmexit_handler(struc
             v->arch.hvm_svm.cpu_cr2 = va;
             vmcb->cr2 = va;
             TRACE_3D(TRC_VMX_INT, v->domain->domain_id, 
-                    VMEXIT_EXCEPTION_PF, va);
+                     VMEXIT_EXCEPTION_PF, va);
         }
         break;
     }
@@ -3048,8 +2927,8 @@ asmlinkage void svm_vmexit_handler(struc
     default:
         printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
                "exitinfo2 = %llx\n", exit_reason, 
-                                    (unsigned long long)vmcb->exitinfo1, 
-                                    (unsigned long long)vmcb->exitinfo2);
+               (unsigned long long)vmcb->exitinfo1, 
+               (unsigned long long)vmcb->exitinfo2);
         __hvm_bug(&regs);       /* should not happen */
         break;
     }
@@ -3064,10 +2943,10 @@ asmlinkage void svm_vmexit_handler(struc
     if (do_debug) 
     {
         printk("vmexit_handler():- guest_table = 0x%08x, "
-                "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
-                (int)v->arch.guest_table.pfn,
-               (int)v->arch.monitor_table.pfn, 
-                (int)v->arch.shadow_table.pfn);
+               "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
+               (int)v->arch.guest_table.pfn,
+               (int)v->arch.monitor_table.pfn, 
+               (int)v->arch.shadow_table.pfn);
         printk("svm_vmexit_handler: Returning\n");
     }
 #endif
@@ -3088,15 +2967,17 @@ asmlinkage void svm_asid(void)
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
-   /*
-    * if need to assign new asid, or if switching cores,
-    * retire asid for the old core, and assign a new asid to the current core.
-    */
+    /*
+     * if need to assign new asid, or if switching cores,
+     * retire asid for the old core, and assign a new asid to the current core.
+     */
     if ( test_bit( ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags ) ||
-       ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
+         ( v->arch.hvm_svm.asid_core != v->arch.hvm_svm.launch_core )) {
         /* recycle asid */
-        if ( !asidpool_assign_next( vmcb, 1,
-            v->arch.hvm_svm.asid_core, v->arch.hvm_svm.launch_core )) {
+        if ( !asidpool_assign_next(vmcb, 1,
+                                   v->arch.hvm_svm.asid_core,
+                                   v->arch.hvm_svm.launch_core) )
+        {
             /* If we get here, we have a major problem */
             domain_crash_synchronous();
         }
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Sun Aug 20 11:08:45 2006 -0400
@@ -309,10 +309,6 @@ int construct_vmcb(struct arch_svm_struc
         goto err_out;
     }
 
-    /* update the HSA for the current Core */
-#if 0
-    set_hsa_to_guest( arch_svm );
-#endif
     arch_svm->vmcb_pa  = (u64) virt_to_maddr(arch_svm->vmcb);
 
     if ((error = construct_vmcb_controls(arch_svm))) 
@@ -364,8 +360,7 @@ void svm_do_launch(struct vcpu *v)
 
     if (hvm_apic_support(v->domain))
         vlapic_init(v);
-    init_timer(&v->arch.hvm_svm.hlt_timer,
-                               hlt_timer_fn, v, v->processor);
+    init_timer(&v->arch.hvm_vcpu.hlt_timer, hlt_timer_fn, v, v->processor);
 
     vmcb->ldtr.sel = 0;
     vmcb->ldtr.base = 0;
@@ -385,8 +380,8 @@ void svm_do_launch(struct vcpu *v)
         printk("%s: phys_table   = %lx\n", __func__, pt);
     }
 
-    /* At launch we always use the phys_table */
-    vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
+    /* Set cr3 from hw_cr3 even when guest-visible paging is not enabled */
+    vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
 
     if (svm_dbg_on) 
     {
@@ -401,7 +396,7 @@ void svm_do_launch(struct vcpu *v)
     v->arch.hvm_svm.saved_irq_vector = -1;
 
     hvm_set_guest_time(v, 0);
-       
+
     if (svm_dbg_on)
         svm_dump_vmcb(__func__, vmcb);
 
@@ -432,24 +427,24 @@ void svm_dump_vmcb(const char *from, str
            vmcb->general1_intercepts, vmcb->general2_intercepts);
     printf("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
             "0x%016llx\n", 
-           (unsigned long long) vmcb->iopm_base_pa,
-           (unsigned long long) vmcb->msrpm_base_pa,
-           (unsigned long long) vmcb->tsc_offset);
+           (unsigned long long) vmcb->iopm_base_pa,
+           (unsigned long long) vmcb->msrpm_base_pa,
+           (unsigned long long) vmcb->tsc_offset);
     printf("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
             "0x%016llx\n", vmcb->tlb_control,
-           (unsigned long long) vmcb->vintr.bytes,
-           (unsigned long long) vmcb->interrupt_shadow);
+           (unsigned long long) vmcb->vintr.bytes,
+           (unsigned long long) vmcb->interrupt_shadow);
     printf("exitcode = 0x%016llx exitintinfo = 0x%016llx\n", 
            (unsigned long long) vmcb->exitcode,
-          (unsigned long long) vmcb->exitintinfo.bytes);
+           (unsigned long long) vmcb->exitintinfo.bytes);
     printf("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
            (unsigned long long) vmcb->exitinfo1,
-          (unsigned long long) vmcb->exitinfo2);
+           (unsigned long long) vmcb->exitinfo2);
     printf("np_enable = 0x%016llx guest_asid = 0x%03x\n", 
            (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
     printf("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n", 
            vmcb->cpl, (unsigned long long) vmcb->efer,
-          (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
+           (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
     printf("CR0 = 0x%016llx CR2 = 0x%016llx\n",
            (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
     printf("CR3 = 0x%016llx CR4 = 0x%016llx\n", 
@@ -465,7 +460,7 @@ void svm_dump_vmcb(const char *from, str
            (unsigned long long) vmcb->sfmask);
     printf("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 
            (unsigned long long) vmcb->kerngsbase,
-          (unsigned long long) vmcb->g_pat);
+           (unsigned long long) vmcb->g_pat);
     
     /* print out all the selectors */
     svm_dump_sel("CS", &vmcb->cs);
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/x86_32/exits.S
--- a/xen/arch/x86/hvm/svm/x86_32/exits.S       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/svm/x86_32/exits.S       Sun Aug 20 11:08:45 2006 -0400
@@ -56,8 +56,8 @@
  * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
  */
 
-#define HVM_MONITOR_EFLAGS     0x202 /* IF on */
-#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define HVM_MONITOR_EFLAGS 0x202 /* IF on */
+#define NR_SKIPPED_REGS    6     /* See the above explanation */
 #define HVM_SAVE_ALL_NOSEGREGS \
         pushl $HVM_MONITOR_EFLAGS; \
         popf; \
@@ -95,8 +95,8 @@ ENTRY(svm_asm_do_launch)
         movl VCPU_svm_vmcb(%ebx), %ecx
         movl 24(%esp), %eax
         movl %eax, VMCB_rax(%ecx)
-       movl VCPU_processor(%ebx), %eax
-       movl root_vmcb_pa(,%eax,8), %eax
+        movl VCPU_processor(%ebx), %eax
+        movl root_vmcb_pa(,%eax,8), %eax
         VMSAVE
 
         movl VCPU_svm_vmcb_pa(%ebx), %eax
@@ -120,8 +120,8 @@ ENTRY(svm_asm_do_launch)
 
         GET_CURRENT(%eax)
 
-       movl VCPU_processor(%eax), %eax
-       movl root_vmcb_pa(,%eax,8), %eax
+        movl VCPU_processor(%eax), %eax
+        movl root_vmcb_pa(,%eax,8), %eax
         VMLOAD
 
         HVM_SAVE_ALL_NOSEGREGS
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/svm/x86_64/exits.S
--- a/xen/arch/x86/hvm/svm/x86_64/exits.S       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/svm/x86_64/exits.S       Sun Aug 20 11:08:45 2006 -0400
@@ -52,8 +52,8 @@
  * (2/1)  u32 entry_vector;
  * (1/1)  u32 error_code;
  */
-#define HVM_MONITOR_RFLAGS     0x202 /* IF on */
-#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define HVM_MONITOR_RFLAGS 0x202 /* IF on */
+#define NR_SKIPPED_REGS    6     /* See the above explanation */
 #define HVM_SAVE_ALL_NOSEGREGS \
         pushq $HVM_MONITOR_RFLAGS; \
         popfq; \
@@ -105,10 +105,9 @@ ENTRY(svm_asm_do_launch)
         movq VCPU_svm_vmcb(%rbx), %rcx
         movq UREGS_rax(%rsp), %rax
         movq %rax, VMCB_rax(%rcx)
-       leaq root_vmcb_pa(%rip), %rax
-       movl VCPU_processor(%rbx), %ecx
-       shll $3, %ecx
-       addq %rcx, %rax
+        leaq root_vmcb_pa(%rip), %rax
+        movl VCPU_processor(%rbx), %ecx
+        movq (%rax,%rcx,8), %rax
         VMSAVE
 
         movq VCPU_svm_vmcb_pa(%rbx), %rax
@@ -139,10 +138,9 @@ ENTRY(svm_asm_do_launch)
         HVM_SAVE_ALL_NOSEGREGS
 
         GET_CURRENT(%rbx)
-       movl VCPU_processor(%rbx), %ecx
-       leaq root_vmcb_pa(%rip), %rax
-       shll $3, %ecx
-       addq %rcx, %rax
+        leaq root_vmcb_pa(%rip), %rax
+        movl VCPU_processor(%rbx), %ecx
+        movq (%rax,%rcx,8), %rax
         VMLOAD
 
         STGI
@@ -151,13 +149,13 @@ ENTRY(svm_asm_do_launch)
 
 ENTRY(svm_asm_do_resume)
 svm_test_all_events:
-       GET_CURRENT(%rbx)
+        GET_CURRENT(%rbx)
         movq %rbx, %rdi
         call hvm_do_resume
 /*test_all_events:*/
         cli                             # tests must not race interrupts
 /*test_softirqs:*/
-       movl  VCPU_processor(%rbx),%eax
+        movl  VCPU_processor(%rbx),%eax
         shl   $IRQSTAT_shift, %rax
         leaq  irq_stat(%rip), %rdx
         testl $~0, (%rdx, %rax, 1)
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c        Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/vioapic.c        Sun Aug 20 11:08:45 2006 -0400
@@ -44,7 +44,7 @@
 #define IRQ0_SPECIAL_ROUTING 1
 
 #if defined(__ia64__)
-#define        opt_hvm_debug_level     opt_vmx_debug_level
+#define opt_hvm_debug_level opt_vmx_debug_level
 #endif
 
 static void ioapic_enable(hvm_vioapic_t *s, uint8_t enable)
@@ -264,7 +264,7 @@ static void hvm_vioapic_reset(hvm_vioapi
 
     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
         s->redirtbl[i].RedirForm.mask = 0x1;
-       hvm_vioapic_update_imr(s, i);
+        hvm_vioapic_update_imr(s, i);
     }
 }
 
@@ -364,7 +364,7 @@ static uint32_t ioapic_get_delivery_bitm
 
     if (dest_mode == 0) { /* Physical mode */
         for (i = 0; i < s->lapic_count; i++) {
-           if (VLAPIC_ID(s->lapic_info[i]) == dest) {
+            if (VLAPIC_ID(s->lapic_info[i]) == dest) {
                 mask = 1 << i;
                 break;
             }
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/vlapic.c Sun Aug 20 11:08:45 2006 -0400
@@ -21,7 +21,8 @@
 #include <xen/types.h>
 #include <xen/mm.h>
 #include <xen/xmalloc.h>
-#include <asm/shadow.h>
+#include <xen/shadow.h>
+#include <xen/domain_page.h>
 #include <asm/page.h>
 #include <xen/event.h>
 #include <xen/trace.h>
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Sun Aug 20 11:08:45 2006 -0400
@@ -34,12 +34,8 @@
 #include <asm/flushtlb.h>
 #include <xen/event.h>
 #include <xen/kernel.h>
-#include <asm/shadow.h>
 #include <xen/keyhandler.h>
-
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/shadow_64.h>
-#endif
+#include <asm/shadow2.h>
 
 static int vmcs_size;
 static int vmcs_order;
@@ -238,7 +234,7 @@ static void vmx_set_host_env(struct vcpu
 
 static void vmx_do_launch(struct vcpu *v)
 {
-/* Update CR3, GDT, LDT, TR */
+/* Update CR3, CR0, CR4, GDT, LDT, TR */
     unsigned int  error = 0;
     unsigned long cr0, cr4;
 
@@ -267,7 +263,7 @@ static void vmx_do_launch(struct vcpu *v
         vlapic_init(v);
 
     vmx_set_host_env(v);
-    init_timer(&v->arch.hvm_vmx.hlt_timer, hlt_timer_fn, v, v->processor);
+    init_timer(&v->arch.hvm_vcpu.hlt_timer, hlt_timer_fn, v, v->processor);
 
     error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
     error |= __vmwrite(GUEST_LDTR_BASE, 0);
@@ -276,8 +272,11 @@ static void vmx_do_launch(struct vcpu *v
     error |= __vmwrite(GUEST_TR_BASE, 0);
     error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
 
-    __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
-    __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
+    shadow2_update_paging_modes(v);
+    printk("%s(): GUEST_CR3<=%08lx, HOST_CR3<=%08lx\n",
+           __func__, v->arch.hvm_vcpu.hw_cr3, v->arch.cr3);
+    __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
+    __vmwrite(HOST_CR3, v->arch.cr3);
 
     v->arch.schedule_tail = arch_vmx_do_resume;
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Sun Aug 20 11:08:45 2006 -0400
@@ -26,9 +26,9 @@
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
 #include <xen/hypercall.h>
+#include <xen/perfc.h>
 #include <asm/current.h>
 #include <asm/io.h>
-#include <asm/shadow.h>
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -40,10 +40,7 @@
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/vmx/cpu.h>
-#include <asm/shadow.h>
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/shadow_64.h>
-#endif
+#include <asm/shadow2.h>
 #include <public/sched.h>
 #include <public/hvm/ioreq.h>
 #include <asm/hvm/vpic.h>
@@ -69,11 +66,16 @@ static int vmx_initialize_guest_resource
     if ( v->vcpu_id != 0 )
         return 1;
 
+    if ( !shadow2_mode_external(d) )
+    {
+        DPRINTK("Can't init HVM for dom %u vcpu %u: "
+                "not in shadow2 external mode\n", 
+                d->domain_id, v->vcpu_id);
+        domain_crash(d);
+    }
+
     for_each_vcpu ( d, vc )
     {
-        /* Initialize monitor page table */
-        vc->arch.monitor_table = pagetable_null();
-
         memset(&vc->arch.hvm_vmx, 0, sizeof(struct arch_vmx_struct));
 
         if ( (rc = vmx_create_vmcs(vc)) != 0 )
@@ -107,6 +109,7 @@ static int vmx_initialize_guest_resource
 
         vc->arch.hvm_vmx.io_bitmap_a = io_bitmap_a;
         vc->arch.hvm_vmx.io_bitmap_b = io_bitmap_b;
+
     }
 
     /*
@@ -116,11 +119,6 @@ static int vmx_initialize_guest_resource
     memset(&d->shared_info->evtchn_mask[0], 0xff,
            sizeof(d->shared_info->evtchn_mask));
 
-    /* Put the domain in shadow mode even though we're going to be using
-     * the shared 1:1 page table initially. It shouldn't hurt */
-    shadow_mode_enable(
-        d, SHM_enable|SHM_refcounts|SHM_translate|SHM_external|SHM_wr_pt_pte);
-
     return 1;
 }
 
@@ -133,8 +131,7 @@ static void vmx_relinquish_guest_resourc
         vmx_destroy_vmcs(v);
         if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
             continue;
-        free_monitor_pagetable(v);
-        kill_timer(&v->arch.hvm_vmx.hlt_timer);
+        kill_timer(&v->arch.hvm_vcpu.hlt_timer);
         if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
         {
             kill_timer(&VLAPIC(v)->vlapic_timer);
@@ -149,12 +146,10 @@ static void vmx_relinquish_guest_resourc
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
-               (void *)d->arch.hvm_domain.shared_page_va);
+            (void *)d->arch.hvm_domain.shared_page_va);
 
     if ( d->arch.hvm_domain.buffered_io_va )
         unmap_domain_page_global((void *)d->arch.hvm_domain.buffered_io_va);
-
-    shadow_direct_map_clean(d);
 }
 
 #ifdef __x86_64__
@@ -496,7 +491,7 @@ void vmx_migrate_timers(struct vcpu *v)
 
     if ( pt->enabled ) {
         migrate_timer(&pt->timer, v->processor);
-        migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+        migrate_timer(&v->arch.hvm_vcpu.hlt_timer, v->processor);
     }
     if ( hvm_apic_support(v->domain) && VLAPIC(v))
         migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
@@ -595,20 +590,12 @@ static void vmx_load_cpu_guest_regs(stru
     vmx_vmcs_exit(v);
 }
 
-static int vmx_realmode(struct vcpu *v)
-{
-    unsigned long rflags;
-
-    __vmread(GUEST_RFLAGS, &rflags);
-    return rflags & X86_EFLAGS_VM;
-}
-
 static int vmx_instruction_length(struct vcpu *v)
 {
     unsigned long inst_len;
 
     if (__vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len))
-       return 0;
+        return 0;
     return inst_len;
 }
 
@@ -622,6 +609,8 @@ static unsigned long vmx_get_ctrl_reg(st
         return v->arch.hvm_vmx.cpu_cr2;
     case 3:
         return v->arch.hvm_vmx.cpu_cr3;
+    case 4:
+        return v->arch.hvm_vmx.cpu_shadow_cr4;
     default:
         BUG();
     }
@@ -753,8 +742,12 @@ static void vmx_setup_hvm_funcs(void)
 
     hvm_funcs.realmode = vmx_realmode;
     hvm_funcs.paging_enabled = vmx_paging_enabled;
+    hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
+    hvm_funcs.guest_x86_mode = vmx_guest_x86_mode;
     hvm_funcs.instruction_length = vmx_instruction_length;
     hvm_funcs.get_guest_ctrl_reg = vmx_get_ctrl_reg;
+
+    hvm_funcs.update_host_cr3 = vmx_update_host_cr3;
 
     hvm_funcs.stts = vmx_stts;
     hvm_funcs.set_tsc_offset = vmx_set_tsc_offset;
@@ -855,53 +848,25 @@ static void inline __update_guest_eip(un
     __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
 }
 
-
 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
 {
-    unsigned long gpa; /* FIXME: PAE */
     int result;
 
 #if 0 /* keep for debugging */
     {
-        unsigned long eip;
-
+        unsigned long eip, cs;
+
+        __vmread(GUEST_CS_BASE, &cs);
         __vmread(GUEST_RIP, &eip);
         HVM_DBG_LOG(DBG_LEVEL_VMMU,
-                    "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
-                    va, eip, (unsigned long)regs->error_code);
+                    "vmx_do_page_fault = 0x%lx, cs_base=%lx, "
+                    "eip = %lx, error_code = %lx\n",
+                    va, cs, eip, (unsigned long)regs->error_code);
     }
 #endif
 
-    if ( !vmx_paging_enabled(current) )
-    {
-        /* construct 1-to-1 direct mapping */
-        if ( shadow_direct_map_fault(va, regs) ) 
-            return 1;
-
-        handle_mmio(va, va);
-        TRACE_VMEXIT (2,2);
-        return 1;
-    }
-    gpa = gva_to_gpa(va);
-
-    /* Use 1:1 page table to identify MMIO address space */
-    if ( mmio_space(gpa) ){
-        struct vcpu *v = current;
-        /* No support for APIC */
-        if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) { 
-            u32 inst_len;
-            __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
-            __update_guest_eip(inst_len);
-            return 1;
-        }
-        TRACE_VMEXIT (2,2);
-        /* in the case of MMIO, we are more interested in gpa than in va */
-        TRACE_VMEXIT (4,gpa);
-        handle_mmio(va, gpa);
-        return 1;
-    }
-
-    result = shadow_fault(va, regs);
+    result = shadow2_fault(va, regs);
+
     TRACE_VMEXIT (2,result);
 #if 0
     if ( !result )
@@ -972,23 +937,11 @@ static void vmx_vmexit_do_cpuid(struct c
                 clear_bit(X86_FEATURE_APIC, &edx);
             }
     
-#if CONFIG_PAGING_LEVELS < 3
-            edx &= ~(bitmaskof(X86_FEATURE_PAE)  |
-                     bitmaskof(X86_FEATURE_PSE)  |
-                     bitmaskof(X86_FEATURE_PSE36));
-#else
-            if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
-            {
-                if ( v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
-                    clear_bit(X86_FEATURE_PSE36, &edx);
-                else
-                {
-                    clear_bit(X86_FEATURE_PAE, &edx);
-                    clear_bit(X86_FEATURE_PSE, &edx);
-                    clear_bit(X86_FEATURE_PSE36, &edx);
-                }
-            }
+#if CONFIG_PAGING_LEVELS >= 3
+            if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
 #endif
+                clear_bit(X86_FEATURE_PAE, &edx);
+            clear_bit(X86_FEATURE_PSE36, &edx);
 
             ebx &= NUM_THREADS_RESET_MASK;  
 
@@ -1086,7 +1039,7 @@ static void vmx_vmexit_do_invlpg(unsigne
      * We do the safest things first, then try to update the shadow
      * copying from guest
      */
-    shadow_invlpg(v, va);
+    shadow2_invlpg(v, va);
 }
 
 
@@ -1141,7 +1094,7 @@ static int check_for_null_selector(unsig
 
 extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
                          unsigned long count, int size, long value,
-                        int dir, int pvalid);
+                         int dir, int pvalid);
 
 static void vmx_io_instruction(unsigned long exit_qualification,
                                unsigned long inst_len)
@@ -1307,11 +1260,8 @@ vmx_world_restore(struct vcpu *v, struct
 
     error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
 
-    if (!vmx_paging_enabled(v)) {
-        HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
+    if (!vmx_paging_enabled(v))
         goto skip_cr3;
-    }
 
     if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
         /*
@@ -1325,7 +1275,6 @@ vmx_world_restore(struct vcpu *v, struct
             domain_crash_synchronous();
             return 0;
         }
-        shadow_sync_all(v->domain);
     } else {
         /*
          * If different, make a shadow. Check if the PDBR is valid
@@ -1348,12 +1297,16 @@ vmx_world_restore(struct vcpu *v, struct
          * arch.shadow_table should now hold the next CR3 for shadow
          */
         v->arch.hvm_vmx.cpu_cr3 = c->cr3;
-        update_pagetables(v);
+    }
+
+ skip_cr3:
+
+    shadow2_update_paging_modes(v);
+    if (!vmx_paging_enabled(v))
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
+    else
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
-    }
-
- skip_cr3:
+    __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
 
     error |= __vmread(CR4_READ_SHADOW, &old_cr4);
     error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
@@ -1485,6 +1438,7 @@ static int vmx_set_cr0(unsigned long val
     int paging_enabled;
     unsigned long vm_entry_value;
     unsigned long old_cr0;
+    unsigned long old_base_mfn;
 
     /*
      * CR0: We don't want to lose PE and PG.
@@ -1514,7 +1468,8 @@ static int vmx_set_cr0(unsigned long val
             v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
              !get_page(mfn_to_page(mfn), v->domain) )
         {
-            printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
+            printk("Invalid CR3 value = %lx (mfn=%lx)\n", 
+                   v->arch.hvm_vmx.cpu_cr3, mfn);
             domain_crash_synchronous(); /* need to take a clean path */
         }
 
@@ -1539,51 +1494,22 @@ static int vmx_set_cr0(unsigned long val
             __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
             vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
             __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
-
-            if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
-            {
-                printk("Unsupported guest paging levels\n");
-                domain_crash_synchronous(); /* need to take a clean path */
-            }
-        }
-        else
-#endif  /* __x86_64__ */
-        {
-#if CONFIG_PAGING_LEVELS >= 3
-            /* seems it's a 32-bit or 32-bit PAE guest */
-
-            if ( test_bit(VMX_CPU_STATE_PAE_ENABLED,
-                        &v->arch.hvm_vmx.cpu_state) )
-            {
-                /* The guest enables PAE first and then it enables PG, it is
-                 * really a PAE guest */
-                if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
-                {
-                    printk("Unsupported guest paging levels\n");
-                    domain_crash_synchronous();
-                }
-            }
-            else
-            {
-                if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
-                {
-                    printk("Unsupported guest paging levels\n");
-                    domain_crash_synchronous(); /* need to take a clean path */
-                }
-            }
+        }
 #endif
-        }
 
         /*
          * Now arch.guest_table points to machine physical.
          */
+        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
         v->arch.guest_table = pagetable_from_pfn(mfn);
-        update_pagetables(v);
+        if (old_base_mfn)
+            put_page(mfn_to_page(old_base_mfn));
+        shadow2_update_paging_modes(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                     (unsigned long) (mfn << PAGE_SHIFT));
 
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
+        __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
         /*
          * arch->shadow_table should hold the next CR3 for shadow
          */
@@ -1625,7 +1551,6 @@ static int vmx_set_cr0(unsigned long val
             }
         }
 
-        clear_all_shadow_status(v->domain);
         if ( vmx_assist(v, VMX_ASSIST_INVOKE) ) {
             set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state);
             __vmread(GUEST_RIP, &eip);
@@ -1651,9 +1576,8 @@ static int vmx_set_cr0(unsigned long val
     }
     else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
     {
-        /* we should take care of this kind of situation */
-        clear_all_shadow_status(v->domain);
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
+        __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
+        shadow2_update_paging_modes(v);
     }
 
     return 1;
@@ -1738,7 +1662,7 @@ static int mov_to_cr(int gp, int cr, str
             mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
             if (mfn != pagetable_get_pfn(v->arch.guest_table))
                 __hvm_bug(regs);
-            shadow_sync_all(v->domain);
+            shadow2_update_cr3(v);
         } else {
             /*
              * If different, make a shadow. Check if the PDBR is valid
@@ -1759,16 +1683,11 @@ static int mov_to_cr(int gp, int cr, str
             /*
              * arch.shadow_table should now hold the next CR3 for shadow
              */
-#if CONFIG_PAGING_LEVELS >= 3
-            if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 )
-                shadow_sync_all(v->domain);
-#endif
-
             v->arch.hvm_vmx.cpu_cr3 = value;
-            update_pagetables(v);
+            update_cr3(v);
             HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
                         value);
-            __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
+            __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
         }
         break;
     }
@@ -1785,12 +1704,6 @@ static int mov_to_cr(int gp, int cr, str
                 /* The guest is a 32-bit PAE guest. */
 #if CONFIG_PAGING_LEVELS >= 3
                 unsigned long mfn, old_base_mfn;
-
-                if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
-                {
-                    printk("Unsupported guest paging levels\n");
-                    domain_crash_synchronous(); /* need to take a clean path */
-                }
 
                 if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
                                     v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
@@ -1800,21 +1713,20 @@ static int mov_to_cr(int gp, int cr, str
                     domain_crash_synchronous(); /* need to take a clean path */
                 }
 
-                old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-                if ( old_base_mfn )
-                    put_page(mfn_to_page(old_base_mfn));
 
                 /*
                  * Now arch.guest_table points to machine physical.
                  */
 
+                old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
                 v->arch.guest_table = pagetable_from_pfn(mfn);
-                update_pagetables(v);
+                if ( old_base_mfn )
+                    put_page(mfn_to_page(old_base_mfn));
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                             (unsigned long) (mfn << PAGE_SHIFT));
 
-                __vmwrite(GUEST_CR3, 
pagetable_get_paddr(v->arch.shadow_table));
+                __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
 
                 /*
                  * arch->shadow_table should hold the next CR3 for shadow
@@ -1822,27 +1734,6 @@ static int mov_to_cr(int gp, int cr, str
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = 
%lx",
                             v->arch.hvm_vmx.cpu_cr3, mfn);
-#endif
-            }
-            else
-            {
-                /*  The guest is a 64 bit or 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 3
-                if ( (v->domain->arch.ops != NULL) &&
-                        v->domain->arch.ops->guest_paging_levels == PAGING_L2)
-                {
-                    /* Seems the guest first enables PAE without enabling PG,
-                     * it must enable PG after that, and it is a 32-bit PAE
-                     * guest */
-
-                    if ( !shadow_set_guest_paging_levels(v->domain,
-                                                            PAGING_L3) )
-                    {
-                        printk("Unsupported guest paging levels\n");
-                        /* need to take a clean path */
-                        domain_crash_synchronous();
-                    }
-                }
 #endif
             }
         }
@@ -1864,8 +1755,7 @@ static int mov_to_cr(int gp, int cr, str
          * all TLB entries except global entries.
          */
         if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
-            shadow_sync_all(v->domain);
-
+            shadow2_update_paging_modes(v);
         break;
     }
     default:
@@ -2049,23 +1939,11 @@ static inline void vmx_do_msr_write(stru
                 (unsigned long)regs->edx);
 }
 
-/*
- * Need to use this exit to reschedule
- */
 void vmx_vmexit_do_hlt(void)
 {
-    struct vcpu *v=current;
-    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
-    s_time_t   next_pit=-1,next_wakeup;
-
-    if ( !v->vcpu_id )
-        next_pit = get_scheduled(v, pt->irq, pt);
-    next_wakeup = get_apictime_scheduled(v);
-    if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
-        next_wakeup = next_pit;
-    if ( next_wakeup != - 1 ) 
-        set_timer(&current->arch.hvm_vmx.hlt_timer, next_wakeup);
-    do_sched_op_compat(SCHEDOP_block, 0);
+    unsigned long rflags;
+    __vmread(GUEST_RFLAGS, &rflags);
+    hvm_hlt(rflags);
 }
 
 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
@@ -2395,8 +2273,6 @@ asmlinkage void vmx_vmexit_handler(struc
     case EXIT_REASON_DR_ACCESS:
         __vmread(EXIT_QUALIFICATION, &exit_qualification);
         vmx_dr_access(exit_qualification, &regs);
-        __get_instruction_length(inst_len);
-        __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_IO_INSTRUCTION:
         __vmread(EXIT_QUALIFICATION, &exit_qualification);
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/x86_32/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S       Sun Aug 20 11:08:45 2006 -0400
@@ -55,7 +55,7 @@
  * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
  */
 
-#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define NR_SKIPPED_REGS 6 /* See the above explanation */
 #define HVM_SAVE_ALL_NOSEGREGS                                              \
         subl $(NR_SKIPPED_REGS*4), %esp;                                    \
         movl $0, 0xc(%esp);  /* XXX why do we need to force eflags==0 ?? */ \
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/hvm/vmx/x86_64/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S       Sun Aug 20 11:08:45 2006 -0400
@@ -51,7 +51,7 @@
  * (2/1)  u32 entry_vector;
  * (1/1)  u32 error_code;
  */
-#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define NR_SKIPPED_REGS 6 /* See the above explanation */
 #define HVM_SAVE_ALL_NOSEGREGS                  \
         subq $(NR_SKIPPED_REGS*8), %rsp;        \
         pushq %rdi;                             \
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/i387.c
--- a/xen/arch/x86/i387.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/i387.c       Sun Aug 20 11:08:45 2006 -0400
@@ -5,7 +5,7 @@
  *
  *  Pentium III FXSR, SSE support
  *  General FPU state handling cleanups
- *     Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
+ *  Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
  */
 
 #include <xen/config.h>
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/io_apic.c    Sun Aug 20 11:08:45 2006 -0400
@@ -639,7 +639,7 @@ static int pin_2_irq(int idx, int apic, 
     }
     default:
     {
-        printk(KERN_ERR "unknown bus type %d.\n",bus); 
+        printk(KERN_ERR "unknown bus type %d.\n",bus);
         irq = 0;
         break;
     }
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c  Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/microcode.c  Sun Aug 20 11:08:45 2006 -0400
@@ -152,7 +152,7 @@ static void collect_cpu_info (void *unus
        unsigned int val[2];
 
        uci->sig = uci->pf = uci->rev = uci->cksum = 0;
-       uci->err = MC_NOTFOUND; 
+       uci->err = MC_NOTFOUND;
        uci->mc = NULL;
 
        if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
@@ -460,7 +460,7 @@ int microcode_update(void *buf, unsigned
        int ret;
 
        if (len < DEFAULT_UCODE_TOTALSIZE) {
-               printk(KERN_ERR "microcode: not enough data\n"); 
+               printk(KERN_ERR "microcode: not enough data\n");
                return -EINVAL;
        }
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/mm.c Sun Aug 20 11:08:45 2006 -0400
@@ -137,7 +137,7 @@ static void free_l1_table(struct page_in
 
 static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
                         unsigned long type);
-static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
+static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t, unsigned long gl1mfn);
 
 /* Used to defer flushing of memory structures. */
 struct percpu_mm_info {
@@ -234,6 +234,21 @@ void arch_init_memory(void)
     subarch_init_memory();
 }
 
+int memory_is_conventional_ram(paddr_t p)
+{
+    int i;
+
+    for ( i = 0; i < e820.nr_map; i++ )
+    {
+        if ( (e820.map[i].type == E820_RAM) &&
+             (e820.map[i].addr <= p) &&
+             (e820.map[i].size > p) )
+            return 1;
+    }
+
+    return 0;
+}
+
 void share_xen_page_with_guest(
     struct page_info *page, struct domain *d, int readonly)
 {
@@ -274,9 +289,9 @@ void share_xen_page_with_privileged_gues
 #else
 /*
  * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
- * We cannot safely shadow the idle page table, nor shadow-mode page tables
+ * We cannot safely shadow the idle page table, nor shadow (v1) page tables
  * (detected by lack of an owning domain). As required for correctness, we
- * always shadow PDPTs aboive 4GB.
+ * always shadow PDPTs above 4GB.
  */
 #define l3tab_needs_shadow(mfn)                         \
     (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
@@ -297,17 +312,21 @@ static int __init cache_pae_fixmap_addre
 }
 __initcall(cache_pae_fixmap_address);
 
-static void __write_ptbase(unsigned long mfn)
+static DEFINE_PER_CPU(u32, make_cr3_timestamp);
+
+void make_cr3(struct vcpu *v, unsigned long mfn)
+/* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if
+ * necessary, and sets v->arch.cr3 to the value to load in CR3. */
 {
     l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
-    struct pae_l3_cache *cache = &current->arch.pae_l3_cache;
+    struct pae_l3_cache *cache = &v->arch.pae_l3_cache;
     unsigned int cpu = smp_processor_id();
 
-    /* Fast path 1: does this mfn need a shadow at all? */
+    /* Fast path: does this mfn need a shadow at all? */
     if ( !l3tab_needs_shadow(mfn) )
     {
-        write_cr3(mfn << PAGE_SHIFT);
-        /* Cache is no longer in use or valid (/after/ write to %cr3). */
+        v->arch.cr3 = mfn << PAGE_SHIFT;
+        /* Cache is no longer in use or valid */
         cache->high_mfn = 0;
         return;
     }
@@ -315,13 +334,6 @@ static void __write_ptbase(unsigned long
     /* Caching logic is not interrupt safe. */
     ASSERT(!in_irq());
 
-    /* Fast path 2: is this mfn already cached? */
-    if ( cache->high_mfn == mfn )
-    {
-        write_cr3(__pa(cache->table[cache->inuse_idx]));
-        return;
-    }
-
     /* Protects against pae_flush_pgd(). */
     spin_lock(&cache->lock);
 
@@ -330,29 +342,33 @@ static void __write_ptbase(unsigned long
 
     /* Map the guest L3 table and copy to the chosen low-memory cache. */
     *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
+    /* First check the previous high mapping can't be in the TLB. 
+     * (i.e. have we loaded CR3 since we last did this?) */
+    if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
+        local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
     highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
     lowmem_l3tab  = cache->table[cache->inuse_idx];
     memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
     *(fix_pae_highmem_pl1e - cpu) = l1e_empty();
-
-    /* Install the low-memory L3 table in CR3. */
-    write_cr3(__pa(lowmem_l3tab));
+    this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
+
+    v->arch.cr3 = __pa(lowmem_l3tab);
 
     spin_unlock(&cache->lock);
 }
 
 #else /* !CONFIG_X86_PAE */
 
-static void __write_ptbase(unsigned long mfn)
-{
-    write_cr3(mfn << PAGE_SHIFT);
+void make_cr3(struct vcpu *v, unsigned long mfn)
+{
+    v->arch.cr3 = mfn << PAGE_SHIFT;
 }
 
 #endif /* !CONFIG_X86_PAE */
 
 void write_ptbase(struct vcpu *v)
 {
-    __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
+    write_cr3(v->arch.cr3);
 }
 
 void invalidate_shadow_ldt(struct vcpu *v)
@@ -423,8 +439,6 @@ int map_ldt_shadow_page(unsigned int off
 
     BUG_ON(unlikely(in_irq()));
 
-    shadow_sync_va(v, gva);
-
     TOGGLE_MODE();
     __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)],
                      sizeof(l1e));
@@ -440,12 +454,12 @@ int map_ldt_shadow_page(unsigned int off
 
     res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
 
-    if ( !res && unlikely(shadow_mode_refcounts(d)) )
-    {
-        shadow_lock(d);
-        shadow_remove_all_write_access(d, gmfn, mfn);
+    if ( !res && unlikely(shadow2_mode_refcounts(d)) )
+    {
+        shadow2_lock(d);
+        shadow2_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
         res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
-        shadow_unlock(d);
+        shadow2_unlock(d);
     }
 
     if ( unlikely(!res) )
@@ -513,7 +527,7 @@ get_linear_pagetable(
     struct page_info *page;
     unsigned long pfn;
 
-    ASSERT( !shadow_mode_refcounts(d) );
+    ASSERT( !shadow2_mode_refcounts(d) );
 
     if ( (root_get_flags(re) & _PAGE_RW) )
     {
@@ -576,7 +590,8 @@ get_page_from_l1e(
 
         if ( !iomem_access_permitted(d, mfn, mfn) )
         {
-            MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn);
+            MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx", 
+                    d->domain_id, mfn);
             return 0;
         }
 
@@ -587,9 +602,14 @@ get_page_from_l1e(
         d = dom_io;
     }
 
-    okay = ((l1e_get_flags(l1e) & _PAGE_RW) ?
-            get_page_and_type(page, d, PGT_writable_page) :
-            get_page(page, d));
+    /* Foreign mappings into guests in shadow2 external mode don't
+     * contribute to writeable mapping refcounts.  (This allows the
+     * qemu-dm helper process in dom0 to map the domain's memory without
+     * messing up the count of "real" writable mappings.) */
+    okay = (((l1e_get_flags(l1e) & _PAGE_RW) && 
+             !(unlikely(shadow2_mode_external(d) && (d != current->domain))))
+            ? get_page_and_type(page, d, PGT_writable_page)
+            : get_page(page, d));
     if ( !okay )
     {
         MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -609,8 +629,6 @@ get_page_from_l2e(
     struct domain *d, unsigned long vaddr)
 {
     int rc;
-
-    ASSERT(!shadow_mode_refcounts(d));
 
     if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
         return 1;
@@ -641,8 +659,6 @@ get_page_from_l3e(
 {
     int rc;
 
-    ASSERT(!shadow_mode_refcounts(d));
-
     if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
         return 1;
 
@@ -668,8 +684,6 @@ get_page_from_l4e(
     struct domain *d, unsigned long vaddr)
 {
     int rc;
-
-    ASSERT( !shadow_mode_refcounts(d) );
 
     if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
         return 1;
@@ -727,7 +741,10 @@ void put_page_from_l1e(l1_pgentry_t l1e,
         domain_crash(d);
     }
 
-    if ( l1e_get_flags(l1e) & _PAGE_RW )
+    /* Remember we didn't take a type-count of foreign writable mappings
+     * to shadow2 external domains */
+    if ( (l1e_get_flags(l1e) & _PAGE_RW) && 
+         !(unlikely((e != d) && shadow2_mode_external(e))) )
     {
         put_page_and_type(page);
     }
@@ -784,7 +801,7 @@ static int alloc_l1_table(struct page_in
     l1_pgentry_t  *pl1e;
     int            i;
 
-    ASSERT(!shadow_mode_refcounts(d));
+    ASSERT(!shadow2_mode_refcounts(d));
 
     pl1e = map_domain_page(pfn);
 
@@ -832,6 +849,8 @@ static int create_pae_xen_mappings(l3_pg
      *  2. Cannot appear in another page table's L3:
      *     a. alloc_l3_table() calls this function and this check will fail
      *     b. mod_l3_entry() disallows updates to slot 3 in an existing table
+     *
+     * XXX -- this needs revisiting for shadow2_mode_refcount()==true...
      */
     page = l3e_get_page(l3e3);
     BUG_ON(page->u.inuse.type_info & PGT_pinned);
@@ -955,11 +974,7 @@ static int alloc_l2_table(struct page_in
     l2_pgentry_t  *pl2e;
     int            i;
 
-    /* See the code in shadow_promote() to understand why this is here. */
-    if ( (PGT_base_page_table == PGT_l2_page_table) &&
-         unlikely(shadow_mode_refcounts(d)) )
-        return 1;
-    ASSERT(!shadow_mode_refcounts(d));
+    ASSERT(!shadow2_mode_refcounts(d));
     
     pl2e = map_domain_page(pfn);
 
@@ -1009,11 +1024,7 @@ static int alloc_l3_table(struct page_in
     l3_pgentry_t  *pl3e;
     int            i;
 
-    /* See the code in shadow_promote() to understand why this is here. */
-    if ( (PGT_base_page_table == PGT_l3_page_table) &&
-         shadow_mode_refcounts(d) )
-        return 1;
-    ASSERT(!shadow_mode_refcounts(d));
+    ASSERT(!shadow2_mode_refcounts(d));
 
 #ifdef CONFIG_X86_PAE
     /*
@@ -1072,11 +1083,7 @@ static int alloc_l4_table(struct page_in
     unsigned long vaddr;
     int            i;
 
-    /* See the code in shadow_promote() to understand why this is here. */
-    if ( (PGT_base_page_table == PGT_l4_page_table) &&
-         shadow_mode_refcounts(d) )
-        return 1;
-    ASSERT(!shadow_mode_refcounts(d));
+    ASSERT(!shadow2_mode_refcounts(d));
 
     for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
     {
@@ -1183,51 +1190,61 @@ static void free_l4_table(struct page_in
 
 static inline int update_l1e(l1_pgentry_t *pl1e, 
                              l1_pgentry_t  ol1e, 
-                             l1_pgentry_t  nl1e)
-{
+                             l1_pgentry_t  nl1e,
+                             unsigned long gl1mfn,
+                             struct vcpu *v)
+{
+    int rv = 1;
+    if ( unlikely(shadow2_mode_enabled(v->domain)) )
+        shadow2_lock(v->domain);
 #ifndef PTE_UPDATE_WITH_CMPXCHG
-    return !__copy_to_user(pl1e, &nl1e, sizeof(nl1e));
+    rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e)));
 #else
-    intpte_t o = l1e_get_intpte(ol1e);
-    intpte_t n = l1e_get_intpte(nl1e);
-
-    for ( ; ; )
-    {
-        if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
-        {
-            MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
-                    ": saw %" PRIpte,
-                    l1e_get_intpte(ol1e),
-                    l1e_get_intpte(nl1e),
-                    o);
-            return 0;
-        }
-
-        if ( o == l1e_get_intpte(ol1e) )
-            break;
-
-        /* Allowed to change in Accessed/Dirty flags only. */
-        BUG_ON((o ^ l1e_get_intpte(ol1e)) &
-               ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
-        ol1e = l1e_from_intpte(o);
-    }
-
-    return 1;
+    {
+        intpte_t o = l1e_get_intpte(ol1e);
+        intpte_t n = l1e_get_intpte(nl1e);
+        
+        for ( ; ; )
+        {
+            if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
+            {
+                MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
+                        ": saw %" PRIpte,
+                        l1e_get_intpte(ol1e),
+                        l1e_get_intpte(nl1e),
+                        o);
+                rv = 0;
+                break;
+            }
+
+            if ( o == l1e_get_intpte(ol1e) )
+                break;
+
+            /* Allowed to change in Accessed/Dirty flags only. */
+            BUG_ON((o ^ l1e_get_intpte(ol1e)) &
+                   ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY));
+            ol1e = l1e_from_intpte(o);
+        }
+    }
 #endif
+    if ( unlikely(shadow2_mode_enabled(v->domain)) )
+    {
+        shadow2_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
+        shadow2_unlock(v->domain);    
+    }
+    return rv;
 }
 
 
 /* Update the L1 entry at pl1e to new value nl1e. */
-static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
+static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, 
+                        unsigned long gl1mfn)
 {
     l1_pgentry_t ol1e;
     struct domain *d = current->domain;
 
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
         return 0;
-
-    if ( unlikely(shadow_mode_refcounts(d)) )
-        return update_l1e(pl1e, ol1e, nl1e);
 
     if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
     {
@@ -1239,13 +1256,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
         }
 
         /* Fast path for identical mapping, r/w and presence. */
-        if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT))
-            return update_l1e(pl1e, ol1e, nl1e);
+        if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
+            return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
 
         if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
             return 0;
         
-        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
+        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
         {
             put_page_from_l1e(nl1e, d);
             return 0;
@@ -1253,7 +1270,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
     }
     else
     {
-        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
+        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) )
             return 0;
     }
 
@@ -1262,9 +1279,9 @@ static int mod_l1_entry(l1_pgentry_t *pl
 }
 
 #ifndef PTE_UPDATE_WITH_CMPXCHG
-#define UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
+#define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; })
 #else
-#define UPDATE_ENTRY(_t,_p,_o,_n) ({                            \
+#define _UPDATE_ENTRY(_t,_p,_o,_n) ({                            \
     for ( ; ; )                                                 \
     {                                                           \
         intpte_t __o = cmpxchg((intpte_t *)(_p),                \
@@ -1279,6 +1296,18 @@ static int mod_l1_entry(l1_pgentry_t *pl
     }                                                           \
     1; })
 #endif
+#define UPDATE_ENTRY(_t,_p,_o,_n,_m)  ({                            \
+    int rv;                                                         \
+    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
+        shadow2_lock(current->domain);                              \
+    rv = _UPDATE_ENTRY(_t, _p, _o, _n);                             \
+    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
+    {                                                               \
+        shadow2_validate_guest_entry(current, _mfn(_m), (_p));      \
+        shadow2_unlock(current->domain);                            \
+    }                                                               \
+    rv;                                                             \
+})
 
 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
 static int mod_l2_entry(l2_pgentry_t *pl2e, 
@@ -1309,19 +1338,19 @@ static int mod_l2_entry(l2_pgentry_t *pl
 
         /* Fast path for identical mapping and presence. */
         if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e);
+            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
 
         if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) ||
              unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
+        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
         {
             put_page_from_l2e(nl2e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
+    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
     {
         return 0;
     }
@@ -1329,7 +1358,6 @@ static int mod_l2_entry(l2_pgentry_t *pl
     put_page_from_l2e(ol2e, pfn);
     return 1;
 }
-
 
 #if CONFIG_PAGING_LEVELS >= 3
 
@@ -1356,7 +1384,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
      */
     if ( pgentry_ptr_to_slot(pl3e) >= 3 )
         return 0;
-#endif
+#endif 
 
     if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
         return 0;
@@ -1372,26 +1400,26 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
         /* Fast path for identical mapping and presence. */
         if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
+            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn);
 
 #if CONFIG_PAGING_LEVELS >= 4
         if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) ||
              unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
-            return 0; 
+            return 0;
 #else
         vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
             << L3_PAGETABLE_SHIFT;
         if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
             return 0;
-#endif
-
-        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+#endif 
+
+        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
         {
             put_page_from_l3e(nl3e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
     {
         return 0;
     }
@@ -1438,19 +1466,19 @@ static int mod_l4_entry(l4_pgentry_t *pl
 
         /* Fast path for identical mapping and presence. */
         if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
-            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e);
+            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn);
 
         if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) ||
              unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) )
             return 0;
 
-        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
         {
             put_page_from_l4e(nl4e, pfn);
             return 0;
         }
     }
-    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
     {
         return 0;
     }
@@ -1506,18 +1534,21 @@ void free_page_type(struct page_info *pa
          */
         this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
 
-        if ( unlikely(shadow_mode_enabled(owner)) )
+        if ( unlikely(shadow2_mode_enabled(owner)
+                 && !shadow2_lock_is_acquired(owner)) )
         {
             /* Raw page tables are rewritten during save/restore. */
-            if ( !shadow_mode_translate(owner) )
+            if ( !shadow2_mode_translate(owner) )
                 mark_dirty(owner, page_to_mfn(page));
 
-            if ( shadow_mode_refcounts(owner) )
+            if ( shadow2_mode_refcounts(owner) )
                 return;
 
             gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
             ASSERT(VALID_M2P(gmfn));
-            remove_shadow(owner, gmfn, type & PGT_type_mask);
+            shadow2_lock(owner);
+            shadow2_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
+            shadow2_unlock(owner);
         }
     }
 
@@ -1573,9 +1604,6 @@ void put_page_type(struct page_info *pag
 
         if ( unlikely((nx & PGT_count_mask) == 0) )
         {
-            /* Record TLB information for flush later. Races are harmless. */
-            page->tlbflush_timestamp = tlbflush_current_time();
-            
             if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
                  likely(nx & PGT_validated) )
             {
@@ -1593,6 +1621,9 @@ void put_page_type(struct page_info *pag
                 x  &= ~PGT_validated;
                 nx &= ~PGT_validated;
             }
+
+            /* Record TLB information for flush later. */
+            page->tlbflush_timestamp = tlbflush_current_time();
         }
         else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == 
                            (PGT_pinned|PGT_l1_page_table|1)) )
@@ -1682,7 +1713,7 @@ int get_page_type(struct page_info *page
 #endif
                     /* Fixme: add code to propagate va_unknown to subtables. */
                     if ( ((type & PGT_type_mask) >= PGT_l2_page_table) &&
-                         !shadow_mode_refcounts(page_get_owner(page)) )
+                         !shadow2_mode_refcounts(page_get_owner(page)) )
                         return 0;
                     /* This table is possibly mapped at multiple locations. */
                     nx &= ~PGT_va_mask;
@@ -1729,7 +1760,10 @@ int new_guest_cr3(unsigned long mfn)
     int okay;
     unsigned long old_base_mfn;
 
-    if ( shadow_mode_refcounts(d) )
+    if ( hvm_guest(v) && !hvm_paging_enabled(v) )
+        domain_crash_synchronous();
+
+    if ( shadow2_mode_refcounts(d) )
     {
         okay = get_page_from_pagenr(mfn, d);
         if ( unlikely(!okay) )
@@ -1747,7 +1781,7 @@ int new_guest_cr3(unsigned long mfn)
             MEM_LOG("New baseptr %lx: slow path via idle pagetables", mfn);
             old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
             v->arch.guest_table = pagetable_null();
-            update_pagetables(v);
+            update_cr3(v);
             write_cr3(__pa(idle_pg_table));
             if ( old_base_mfn != 0 )
                 put_page_and_type(mfn_to_page(old_base_mfn));
@@ -1769,30 +1803,20 @@ int new_guest_cr3(unsigned long mfn)
     invalidate_shadow_ldt(v);
 
     old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+
     v->arch.guest_table = pagetable_from_pfn(mfn);
-    update_pagetables(v); /* update shadow_table and monitor_table */
+    update_cr3(v); /* update shadow_table and cr3 fields of vcpu struct */
 
     write_ptbase(v);
 
     if ( likely(old_base_mfn != 0) )
     {
-        if ( shadow_mode_refcounts(d) )
+        if ( shadow2_mode_refcounts(d) )
             put_page(mfn_to_page(old_base_mfn));
         else
             put_page_and_type(mfn_to_page(old_base_mfn));
     }
 
-    /* CR3 also holds a ref to its shadow... */
-    if ( shadow_mode_enabled(d) )
-    {
-        if ( v->arch.monitor_shadow_ref )
-            put_shadow_ref(v->arch.monitor_shadow_ref);
-        v->arch.monitor_shadow_ref =
-            pagetable_get_pfn(v->arch.monitor_table);
-        ASSERT(!page_get_owner(mfn_to_page(v->arch.monitor_shadow_ref)));
-        get_shadow_ref(v->arch.monitor_shadow_ref);
-    }
-
     return 1;
 }
 
@@ -1807,8 +1831,6 @@ static void process_deferred_ops(void)
 
     if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
     {
-        if ( shadow_mode_enabled(d) )
-            shadow_sync_all(d);
         if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
             flush_tlb_mask(d->domain_dirty_cpumask);
         else
@@ -1974,7 +1996,7 @@ int do_mmuext_op(
             type = PGT_root_page_table;
 
         pin_page:
-            if ( shadow_mode_refcounts(FOREIGNDOM) )
+            if ( shadow2_mode_refcounts(FOREIGNDOM) )
                 break;
 
             okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
@@ -1996,7 +2018,7 @@ int do_mmuext_op(
             break;
 
         case MMUEXT_UNPIN_TABLE:
-            if ( shadow_mode_refcounts(d) )
+            if ( shadow2_mode_refcounts(d) )
                 break;
 
             if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
@@ -2009,6 +2031,12 @@ int do_mmuext_op(
             {
                 put_page_and_type(page);
                 put_page(page);
+                if ( shadow2_mode_enabled(d) )
+                {
+                    shadow2_lock(d);
+                    shadow2_remove_all_shadows(v, _mfn(mfn));
+                    shadow2_unlock(d);
+                }
             }
             else
             {
@@ -2050,9 +2078,9 @@ int do_mmuext_op(
             break;
     
         case MMUEXT_INVLPG_LOCAL:
-            if ( shadow_mode_enabled(d) )
-                shadow_invlpg(v, op.arg1.linear_addr);
-            local_flush_tlb_one(op.arg1.linear_addr);
+            if ( !shadow2_mode_enabled(d) 
+                 || shadow2_invlpg(v, op.arg1.linear_addr) != 0 )
+                local_flush_tlb_one(op.arg1.linear_addr);
             break;
 
         case MMUEXT_TLB_FLUSH_MULTI:
@@ -2098,7 +2126,7 @@ int do_mmuext_op(
             unsigned long ptr  = op.arg1.linear_addr;
             unsigned long ents = op.arg2.nr_ents;
 
-            if ( shadow_mode_external(d) )
+            if ( shadow2_mode_external(d) )
             {
                 MEM_LOG("ignoring SET_LDT hypercall from external "
                         "domain %u", d->domain_id);
@@ -2171,9 +2199,6 @@ int do_mmu_update(
 
     LOCK_BIGLOCK(d);
 
-    if ( unlikely(shadow_mode_enabled(d)) )
-        check_pagetable(v, "pre-mmu"); /* debug */
-
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
     {
         count &= ~MMU_UPDATE_PREEMPTED;
@@ -2190,7 +2215,7 @@ int do_mmu_update(
         goto out;
     }
 
-    perfc_incrc(calls_to_mmu_update); 
+    perfc_incrc(calls_to_mmu_update);
     perfc_addc(num_page_updates, count);
     perfc_incr_histo(bpt_updates, count, PT_UPDATES);
 
@@ -2248,7 +2273,12 @@ int do_mmu_update(
             case PGT_l3_page_table:
             case PGT_l4_page_table:
             {
-                ASSERT(!shadow_mode_refcounts(d));
+                if ( shadow2_mode_refcounts(d) )
+                {
+                    DPRINTK("mmu update on shadow-refcounted domain!");
+                    break;
+                }
+
                 if ( unlikely(!get_page_type(
                     page, type_info & (PGT_type_mask|PGT_va_mask))) )
                     goto not_a_pt;
@@ -2258,10 +2288,7 @@ int do_mmu_update(
                 case PGT_l1_page_table:
                 {
                     l1_pgentry_t l1e = l1e_from_intpte(req.val);
-                    okay = mod_l1_entry(va, l1e);
-                    if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l1_normal_pt_update(
-                            d, req.ptr, l1e, &sh_mapcache);
+                    okay = mod_l1_entry(va, l1e, mfn);
                 }
                 break;
                 case PGT_l2_page_table:
@@ -2269,9 +2296,6 @@ int do_mmu_update(
                     l2_pgentry_t l2e = l2e_from_intpte(req.val);
                     okay = mod_l2_entry(
                         (l2_pgentry_t *)va, l2e, mfn, type_info);
-                    if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l2_normal_pt_update(
-                            d, req.ptr, l2e, &sh_mapcache);
                 }
                 break;
 #if CONFIG_PAGING_LEVELS >= 3
@@ -2279,9 +2303,6 @@ int do_mmu_update(
                 {
                     l3_pgentry_t l3e = l3e_from_intpte(req.val);
                     okay = mod_l3_entry(va, l3e, mfn, type_info);
-                    if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l3_normal_pt_update(
-                            d, req.ptr, l3e, &sh_mapcache);
                 }
                 break;
 #endif
@@ -2290,9 +2311,6 @@ int do_mmu_update(
                 {
                     l4_pgentry_t l4e = l4e_from_intpte(req.val);
                     okay = mod_l4_entry(va, l4e, mfn, type_info);
-                    if ( okay && unlikely(shadow_mode_enabled(d)) )
-                        shadow_l4_normal_pt_update(
-                            d, req.ptr, l4e, &sh_mapcache);
                 }
                 break;
 #endif
@@ -2308,19 +2326,17 @@ int do_mmu_update(
                 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
                     break;
 
-                if ( shadow_mode_enabled(d) )
-                {
-                    shadow_lock(d);
-                    __mark_dirty(d, mfn);
-                    if ( page_is_page_table(page) && !page_out_of_sync(page) )
-                        shadow_mark_mfn_out_of_sync(v, gmfn, mfn);
-                }
+                if ( unlikely(shadow2_mode_enabled(d)) )
+                    shadow2_lock(d);
 
                 *(intpte_t *)va = req.val;
                 okay = 1;
 
-                if ( shadow_mode_enabled(d) )
-                    shadow_unlock(d);
+                if ( unlikely(shadow2_mode_enabled(d)) )
+                {
+                    shadow2_validate_guest_entry(v, _mfn(mfn), va);
+                    shadow2_unlock(d);
+                }
 
                 put_page_type(page);
             }
@@ -2333,12 +2349,6 @@ int do_mmu_update(
             break;
 
         case MMU_MACHPHYS_UPDATE:
-
-            if ( shadow_mode_translate(FOREIGNDOM) )
-            {
-                MEM_LOG("can't mutate m2p table of translate mode guest");
-                break;
-            }
 
             mfn = req.ptr >> PAGE_SHIFT;
             gpfn = req.val;
@@ -2349,9 +2359,13 @@ int do_mmu_update(
                 break;
             }
 
-            set_gpfn_from_mfn(mfn, gpfn);
+            if ( shadow2_mode_translate(FOREIGNDOM) )
+                shadow2_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn);
+            else 
+                set_gpfn_from_mfn(mfn, gpfn);
             okay = 1;
 
+            // Mark the new gfn dirty...
             mark_dirty(FOREIGNDOM, mfn);
 
             put_page(mfn_to_page(mfn));
@@ -2381,9 +2395,6 @@ int do_mmu_update(
     done += i;
     if ( unlikely(!guest_handle_is_null(pdone)) )
         copy_to_guest(pdone, &done, 1);
-
-    if ( unlikely(shadow_mode_enabled(d)) )
-        check_pagetable(v, "post-mmu"); /* debug */
 
     UNLOCK_BIGLOCK(d);
     return rc;
@@ -2402,7 +2413,6 @@ static int create_grant_pte_mapping(
     struct domain *d = v->domain;
 
     ASSERT(spin_is_locked(&d->big_lock));
-    ASSERT(!shadow_mode_refcounts(d));
 
     gmfn = pte_addr >> PAGE_SHIFT;
     mfn = gmfn_to_mfn(d, gmfn);
@@ -2418,7 +2428,7 @@ static int create_grant_pte_mapping(
     page = mfn_to_page(mfn);
 
     type_info = page->u.inuse.type_info;
-    if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+    if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||         
          !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
     {
         MEM_LOG("Grant map attempted to update a non-L1 page");
@@ -2427,28 +2437,22 @@ static int create_grant_pte_mapping(
     }
 
     ol1e = *(l1_pgentry_t *)va;
-    if ( !update_l1e(va, ol1e, _nl1e) )
+    if ( !update_l1e(va, ol1e, _nl1e, mfn, v) )
     {
         put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     } 
 
-    put_page_from_l1e(ol1e, d);
-
-    if ( unlikely(shadow_mode_enabled(d)) )
-    {
-        struct domain_mmap_cache sh_mapcache;
-        domain_mmap_cache_init(&sh_mapcache);
-        shadow_l1_normal_pt_update(d, pte_addr, _nl1e, &sh_mapcache);
-        domain_mmap_cache_destroy(&sh_mapcache);
-    }
+    if ( !shadow2_mode_refcounts(d) )
+        put_page_from_l1e(ol1e, d);
 
     put_page_type(page);
  
  failed:
     unmap_domain_page(va);
     put_page(page);
+
     return rc;
 }
 
@@ -2462,8 +2466,6 @@ static int destroy_grant_pte_mapping(
     u32 type_info;
     l1_pgentry_t ol1e;
 
-    ASSERT(!shadow_mode_refcounts(d));
-
     gmfn = addr >> PAGE_SHIFT;
     mfn = gmfn_to_mfn(d, gmfn);
 
@@ -2504,7 +2506,9 @@ static int destroy_grant_pte_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!update_l1e((l1_pgentry_t *)va, ol1e, l1e_empty())) )
+    if ( unlikely(!update_l1e(
+                      (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, 
+                      d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", va);
         put_page_type(page);
@@ -2512,14 +2516,6 @@ static int destroy_grant_pte_mapping(
         goto failed;
     }
 
-    if ( unlikely(shadow_mode_enabled(d)) )
-    {
-        struct domain_mmap_cache sh_mapcache;
-        domain_mmap_cache_init(&sh_mapcache);
-        shadow_l1_normal_pt_update(d, addr, l1e_empty(), &sh_mapcache);
-        domain_mmap_cache_destroy(&sh_mapcache);
-    }
-
     put_page_type(page);
 
  failed:
@@ -2536,31 +2532,22 @@ static int create_grant_va_mapping(
     struct domain *d = v->domain;
     
     ASSERT(spin_is_locked(&d->big_lock));
-    ASSERT(!shadow_mode_refcounts(d));
-
-    /*
-     * This is actually overkill - we don't need to sync the L1 itself,
-     * just everything involved in getting to this L1 (i.e. we need
-     * linear_pg_table[l1_linear_offset(va)] to be in sync)...
-     */
-    __shadow_sync_va(v, va);
 
     pl1e = &linear_pg_table[l1_linear_offset(va)];
 
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
-         !update_l1e(pl1e, ol1e, _nl1e) )
+         !update_l1e(pl1e, ol1e, _nl1e, 
+                    l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
         return GNTST_general_error;
 
-    put_page_from_l1e(ol1e, d);
-
-    if ( unlikely(shadow_mode_enabled(d)) )
-        shadow_do_update_va_mapping(va, _nl1e, v);
+    if ( !shadow2_mode_refcounts(d) )
+        put_page_from_l1e(ol1e, d);
 
     return GNTST_okay;
 }
 
 static int destroy_grant_va_mapping(
-    unsigned long addr, unsigned long frame)
+    unsigned long addr, unsigned long frame, struct domain *d)
 {
     l1_pgentry_t *pl1e, ol1e;
     
@@ -2584,12 +2571,14 @@ static int destroy_grant_va_mapping(
     }
 
     /* Delete pagetable entry. */
-    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty())) )
+    if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), 
+                      l2e_get_pfn(__linear_l2_table[l2_linear_offset(addr)]),
+                      d->vcpu[0] /* Change for per-vcpu shadows */)) )
     {
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         return GNTST_general_error;
     }
-    
+
     return 0;
 }
 
@@ -2597,7 +2586,7 @@ int create_grant_host_mapping(
     unsigned long addr, unsigned long frame, unsigned int flags)
 {
     l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
-        
+
     if ( (flags & GNTMAP_application_map) )
         l1e_add_flags(pte,_PAGE_USER);
     if ( !(flags & GNTMAP_readonly) )
@@ -2613,7 +2602,7 @@ int destroy_grant_host_mapping(
 {
     if ( flags & GNTMAP_contains_pte )
         return destroy_grant_pte_mapping(addr, frame, current->domain);
-    return destroy_grant_va_mapping(addr, frame);
+    return destroy_grant_va_mapping(addr, frame, current->domain);
 }
 
 int steal_page(
@@ -2675,46 +2664,44 @@ int do_update_va_mapping(unsigned long v
 
     perfc_incrc(calls_to_update_va);
 
-    if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
+    if ( unlikely(!__addr_ok(va) && !shadow2_mode_external(d)) )
         return -EINVAL;
 
+    if ( unlikely(shadow2_mode_refcounts(d)) )
+    {
+        DPRINTK("Grant op on a shadow-refcounted domain\n");
+        return -EINVAL; 
+    }
+
     LOCK_BIGLOCK(d);
 
-    if ( unlikely(shadow_mode_enabled(d)) )
-        check_pagetable(v, "pre-va"); /* debug */
-
-    if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
-                                val)) )
-        rc = -EINVAL;
-
-    if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
+    if ( likely(rc == 0) && unlikely(shadow2_mode_enabled(d)) )
     {
         if ( unlikely(this_cpu(percpu_mm_info).foreign &&
-                      (shadow_mode_translate(d) ||
-                       shadow_mode_translate(
+                      (shadow2_mode_translate(d) ||
+                       shadow2_mode_translate(
                            this_cpu(percpu_mm_info).foreign))) )
         {
             /*
              * The foreign domain's pfn's are in a different namespace. There's
-             * not enough information in just a gpte to figure out how to
+             * not enough information in just a gpte to figure out how to   
              * (re-)shadow this entry.
              */
             domain_crash(d);
         }
+    }
+
+    if ( unlikely(!mod_l1_entry(
+                      &linear_pg_table[l1_linear_offset(va)], val,
+                      l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]))) )
+        rc = -EINVAL;
     
-        rc = shadow_do_update_va_mapping(va, val, v);
-
-        check_pagetable(v, "post-va"); /* debug */
-    }
-
     switch ( flags & UVMF_FLUSHTYPE_MASK )
     {
     case UVMF_TLB_FLUSH:
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
-            if ( unlikely(shadow_mode_enabled(d)) )
-                shadow_sync_all(d);
             local_flush_tlb();
             break;
         case UVMF_ALL:
@@ -2733,9 +2720,9 @@ int do_update_va_mapping(unsigned long v
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
-            if ( unlikely(shadow_mode_enabled(d)) )
-                shadow_invlpg(current, va);
-            local_flush_tlb_one(va);
+            if ( !shadow2_mode_enabled(d) 
+                 || (shadow2_invlpg(current, va) != 0) ) 
+                local_flush_tlb_one(va);
             break;
         case UVMF_ALL:
             flush_tlb_one_mask(d->domain_dirty_cpumask, va);
@@ -2807,8 +2794,6 @@ long set_gdt(struct vcpu *v,
 
     if ( entries > FIRST_RESERVED_GDT_ENTRY )
         return -EINVAL;
-
-    shadow_sync_all(d);
 
     /* Check the pages in the new GDT. */
     for ( i = 0; i < nr_pages; i++ ) {
@@ -2912,23 +2897,12 @@ long do_update_descriptor(u64 pa, u64 de
         break;
     }
 
-    if ( shadow_mode_enabled(dom) )
-    {
-        shadow_lock(dom);
-
-        __mark_dirty(dom, mfn);
-
-        if ( page_is_page_table(page) && !page_out_of_sync(page) )
-            shadow_mark_mfn_out_of_sync(current, gmfn, mfn);
-    }
+    mark_dirty(dom, mfn);
 
     /* All is good so make the update. */
     gdt_pent = map_domain_page(mfn);
     memcpy(&gdt_pent[offset], &d, 8);
     unmap_domain_page(gdt_pent);
-
-    if ( shadow_mode_enabled(dom) )
-        shadow_unlock(dom);
 
     put_page_type(page);
 
@@ -2981,8 +2955,8 @@ long arch_memory_op(int op, XEN_GUEST_HA
         default:
             break;
         }
-        
-        if ( !shadow_mode_translate(d) || (mfn == 0) )
+
+        if ( !shadow2_mode_translate(d) || (mfn == 0) )
         {
             put_domain(d);
             return -EINVAL;
@@ -3011,7 +2985,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
         guest_physmap_add_page(d, xatp.gpfn, mfn);
 
         UNLOCK_BIGLOCK(d);
-
+        
         put_domain(d);
 
         break;
@@ -3075,56 +3049,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
 /*************************
  * Writable Pagetables
  */
-
-/* Re-validate a given p.t. page, given its prior snapshot */
-int revalidate_l1(
-    struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
-{
-    l1_pgentry_t ol1e, nl1e;
-    int modified = 0, i;
-
-    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
-    {
-        ol1e = snapshot[i];
-        nl1e = l1page[i];
-
-        if ( likely(l1e_get_intpte(ol1e) == l1e_get_intpte(nl1e)) )
-            continue;
-
-        /* Update number of entries modified. */
-        modified++;
-
-        /*
-         * Fast path for PTEs that have merely been write-protected
-         * (e.g., during a Unix fork()). A strict reduction in privilege.
-         */
-        if ( likely(l1e_get_intpte(ol1e) == (l1e_get_intpte(nl1e)|_PAGE_RW)) )
-        {
-            if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
-                put_page_type(mfn_to_page(l1e_get_pfn(nl1e)));
-            continue;
-        }
-
-        if ( unlikely(!get_page_from_l1e(nl1e, d)) )
-        {
-            /*
-             * Make the remaining p.t's consistent before crashing, so the
-             * reference counts are correct.
-             */
-            memcpy(&l1page[i], &snapshot[i],
-                   (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
-
-            /* Crash the offending domain. */
-            MEM_LOG("ptwr: Could not revalidate l1 page");
-            domain_crash(d);
-            break;
-        }
-        
-        put_page_from_l1e(ol1e, d);
-    }
-
-    return modified;
-}
 
 static int ptwr_emulated_update(
     unsigned long addr,
@@ -3136,7 +3060,8 @@ static int ptwr_emulated_update(
     unsigned long pfn;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
-    struct domain *d = current->domain;
+    struct vcpu *v = current;
+    struct domain *d = v->domain;
 
     /* Aligned access only, thank you. */
     if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
@@ -3192,8 +3117,26 @@ static int ptwr_emulated_update(
     nl1e = l1e_from_intpte(val);
     if ( unlikely(!get_page_from_l1e(nl1e, d)) )
     {
-        MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
-        return X86EMUL_UNHANDLEABLE;
+        if ( (CONFIG_PAGING_LEVELS == 3) &&
+             (bytes == 4) &&
+             !do_cmpxchg &&
+             (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+        {
+            /*
+             * If this is a half-write to a PAE PTE then we assume that the
+             * guest has simply got the two writes the wrong way round. We
+             * zap the PRESENT bit on the assumption the bottom half will be
+             * written immediately after we return to the guest.
+             */
+            MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte"\n",
+                    l1e_get_intpte(nl1e));
+            l1e_remove_flags(nl1e, _PAGE_PRESENT);
+        }
+        else
+        {
+            MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
+            return X86EMUL_UNHANDLEABLE;
+        }
     }
 
     /* Checked successfully: do the update (write or cmpxchg). */
@@ -3201,20 +3144,30 @@ static int ptwr_emulated_update(
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
     if ( do_cmpxchg )
     {
+        if ( shadow2_mode_enabled(d) )
+            shadow2_lock(d);
         ol1e = l1e_from_intpte(old);
         if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
         {
+            if ( shadow2_mode_enabled(d) )
+                shadow2_unlock(d);
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
             return X86EMUL_CMPXCHG_FAILED;
         }
+        if ( unlikely(shadow2_mode_enabled(v->domain)) )
+        {
+            shadow2_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
+            shadow2_unlock(v->domain);    
+        }
     }
     else
     {
         ol1e = *pl1e;
-        if ( !update_l1e(pl1e, ol1e, nl1e) )
+        if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) )
             BUG();
     }
+
     unmap_domain_page(pl1e);
 
     /* Finally, drop the old PTE. */
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/mpparse.c
--- a/xen/arch/x86/mpparse.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/mpparse.c    Sun Aug 20 11:08:45 2006 -0400
@@ -107,7 +107,7 @@ static int __init mpf_checksum(unsigned 
  * doing this ....
  */
 
-static int mpc_record; 
+static int mpc_record;
 static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] 
__initdata;
 
 #ifdef CONFIG_X86_NUMAQ
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/nmi.c
--- a/xen/arch/x86/nmi.c        Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/nmi.c        Sun Aug 20 11:08:45 2006 -0400
@@ -6,11 +6,11 @@
  *  Started by Ingo Molnar <mingo@xxxxxxxxxx>
  *
  *  Fixes:
- *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
- *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
- *  Mikael Pettersson  : Pentium 4 support for local APIC NMI watchdog.
+ *  Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
+ *  Mikael Pettersson : Power Management for local APIC NMI watchdog.
+ *  Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
  *  Pavel Machek and
- *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
+ *  Mikael Pettersson : PM converted to driver model. Disable/enable API.
  */
 
 #include <xen/config.h>
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c   Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/oprofile/nmi_int.c   Sun Aug 20 11:08:45 2006 -0400
@@ -59,7 +59,7 @@ static void nmi_cpu_save_registers(struc
 static void nmi_cpu_save_registers(struct op_msrs *msrs)
 {
        unsigned int const nr_ctrs = model->num_counters;
-       unsigned int const nr_ctrls = model->num_controls; 
+       unsigned int const nr_ctrls = model->num_controls;
        struct op_msr *counters = msrs->counters;
        struct op_msr *controls = msrs->controls;
        unsigned int i;
@@ -180,7 +180,7 @@ static void nmi_restore_registers(struct
 static void nmi_restore_registers(struct op_msrs * msrs)
 {
        unsigned int const nr_ctrs = model->num_counters;
-       unsigned int const nr_ctrls = model->num_controls; 
+       unsigned int const nr_ctrls = model->num_controls;
        struct op_msr * counters = msrs->counters;
        struct op_msr * controls = msrs->controls;
        unsigned int i;
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/oprofile/op_model_p4.c
--- a/xen/arch/x86/oprofile/op_model_p4.c       Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/oprofile/op_model_p4.c       Sun Aug 20 11:08:45 2006 -0400
@@ -401,7 +401,7 @@ static unsigned long reset_value[NUM_COU
 
 static void p4_fill_in_addresses(struct op_msrs * const msrs)
 {
-       unsigned int i; 
+       unsigned int i;
        unsigned int addr, stag;
 
        setup_num_counters();
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/oprofile/xenoprof.c
--- a/xen/arch/x86/oprofile/xenoprof.c  Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/oprofile/xenoprof.c  Sun Aug 20 11:08:45 2006 -0400
@@ -255,7 +255,7 @@ int reset_active(struct domain *d)
     active_ready[ind] = 0;
     active_domains[ind] = NULL;
     activated--;
-    put_domain(d); 
+    put_domain(d);
 
     if ( activated <= 0 )
         adomains = 0;
@@ -316,7 +316,7 @@ int add_active_list (domid_t domid)
     if ( adomains >= MAX_OPROF_DOMAINS )
         return -E2BIG;
 
-    d = find_domain_by_id(domid); 
+    d = find_domain_by_id(domid);
     if ( d == NULL )
         return -EINVAL;
 
@@ -339,7 +339,7 @@ int add_passive_list(XEN_GUEST_HANDLE(vo
     if ( copy_from_guest(&passive, arg, 1) )
         return -EFAULT;
 
-    d = find_domain_by_id(passive.domain_id); 
+    d = find_domain_by_id(passive.domain_id);
     if ( d == NULL )
         return -EINVAL;
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/setup.c      Sun Aug 20 11:08:45 2006 -0400
@@ -44,7 +44,7 @@ boolean_param("nosmp", opt_nosmp);
 
 /* maxcpus: maximum number of CPUs to activate. */
 static unsigned int max_cpus = NR_CPUS;
-integer_param("maxcpus", max_cpus); 
+integer_param("maxcpus", max_cpus);
 
 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
 static int opt_watchdog = 0;
@@ -532,8 +532,6 @@ void __init __start_xen(multiboot_info_t
     if ( opt_watchdog ) 
         watchdog_enable();
 
-    shadow_mode_init();
-
     /* initialize access control security module */
     acm_init(&initrdidx, mbi, initial_images_start);
 
diff -r 96d6f9cfed6e -r 4cffec02b478 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Sun Aug 20 11:07:52 2006 -0400
+++ b/xen/arch/x86/smpboot.c    Sun Aug 20 11:08:45 2006 -0400
@@ -896,7 +896,7 @@ static int __devinit do_boot_cpu(int api
        v = alloc_idle_vcpu(cpu);
        BUG_ON(v == NULL);
 
-       v->arch.monitor_table = pagetable_from_paddr(__pa(idle_pg_table));
+       v->arch.cr3 = __pa(idle_pg_table);
 
        /* start_eip had better be page-aligned! */

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.