[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [HVM] Add type information to the p2m map.



# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Date 1189431750 -3600
# Node ID 4633e9604da9c51f077285465d63db1820e6f574
# Parent  1474db8058b20753eb465273f7dbf5e10662bf0f
[HVM] Add type information to the p2m map.
This is a base for memory tricks like page sharing, copy-on-write, lazy
allocation etc.  It should also make pass-through MMIO easier to
implement in the p2m.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
 xen/arch/x86/hvm/hvm.c           |   33 ++++---
 xen/arch/x86/hvm/io.c            |    9 -
 xen/arch/x86/hvm/svm/svm.c       |   32 ++++--
 xen/arch/x86/hvm/vmx/vmx.c       |   18 ++-
 xen/arch/x86/mm/hap/guest_walk.c |   10 +-
 xen/arch/x86/mm/hap/hap.c        |   10 +-
 xen/arch/x86/mm/p2m.c            |  122 +++++++++++++++++---------
 xen/arch/x86/mm/shadow/common.c  |   24 +++--
 xen/arch/x86/mm/shadow/multi.c   |  138 +++++++++++++++++++-----------
 xen/arch/x86/mm/shadow/types.h   |    2 
 xen/include/asm-x86/mm.h         |    2 
 xen/include/asm-x86/p2m.h        |  179 ++++++++++++++++++++++++++++-----------
 12 files changed, 379 insertions(+), 200 deletions(-)

diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Mon Sep 10 14:42:30 2007 +0100
@@ -161,12 +161,14 @@ static int hvm_set_ioreq_page(
     struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
 {
     struct page_info *page;
+    p2m_type_t p2mt;
     unsigned long mfn;
     void *va;
 
-    mfn = gmfn_to_mfn(d, gmfn);
-    if ( !mfn_valid(mfn) )
+    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+    if ( !p2m_is_ram(p2mt) )
         return -EINVAL;
+    ASSERT(mfn_valid(mfn));
 
     page = mfn_to_page(mfn);
     if ( !get_page_and_type(page, d, PGT_writable_page) )
@@ -517,7 +519,8 @@ int hvm_set_cr0(unsigned long value)
 int hvm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
-    unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
+    p2m_type_t p2mt;
+    unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
   
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
 
@@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value)
         if ( !paging_mode_hap(v->domain) )
         {
             /* The guest CR3 must be pointing to the guest physical. */
-            mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
-            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
+            gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
+            mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+            if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || 
+                 !get_page(mfn_to_page(mfn), v->domain))
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 
                          v->arch.hvm_vcpu.guest_cr[3], mfn);
@@ -603,16 +608,18 @@ int hvm_set_cr3(unsigned long value)
 int hvm_set_cr3(unsigned long value)
 {
     unsigned long mfn;
+    p2m_type_t p2mt;
     struct vcpu *v = current;
 
     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
          (value != v->arch.hvm_vcpu.guest_cr[3]) )
     {
-        /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
+        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
-        mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
-            goto bad_cr3;
+        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
+             !get_page(mfn_to_page(mfn), v->domain) )
+              goto bad_cr3;
 
         put_page(pagetable_get_page(v->arch.guest_table));
         v->arch.guest_table = pagetable_from_pfn(mfn);
@@ -677,6 +684,7 @@ static int __hvm_copy(void *buf, paddr_t
 static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
 {
     unsigned long gfn, mfn;
+    p2m_type_t p2mt;
     char *p;
     int count, todo;
 
@@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t
         else
             gfn = addr >> PAGE_SHIFT;
         
-        mfn = get_mfn_from_gpfn(gfn);
-
-        if ( mfn == INVALID_MFN )
+        mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+
+        if ( !p2m_is_ram(p2mt) )
             return todo;
+        ASSERT(mfn_valid(mfn));
 
         p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
 
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/io.c     Mon Sep 10 14:42:30 2007 +0100
@@ -826,9 +826,7 @@ void hvm_io_assist(void)
     ioreq_t *p;
     struct cpu_user_regs *regs;
     struct hvm_io_op *io_opp;
-    unsigned long gmfn;
     struct vcpu *v = current;
-    struct domain *d = v->domain;
 
     io_opp = &v->arch.hvm_vcpu.io_op;
     regs   = &io_opp->io_context;
@@ -861,13 +859,6 @@ void hvm_io_assist(void)
     regs->eflags &= ~X86_EFLAGS_RF;
     hvm_load_cpu_guest_regs(v, regs);
     memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
-
-    /* Has memory been dirtied? */
-    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
-    {
-        gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
-        paging_mark_dirty(d, gmfn);
-    }
 
  out:
     vcpu_end_shutdown_deferral(v);
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Sep 10 14:42:30 2007 +0100
@@ -338,6 +338,7 @@ int svm_vmcb_restore(struct vcpu *v, str
 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
     if ( c->pending_valid &&
@@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str
     {
         if ( c->cr0 & X86_CR0_PG )
         {
-            mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+            mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+            if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
                          c->cr3);
@@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c)
     return 1;
 }
 
-static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
-{
-    if (mmio_space(gpa)) {
+static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+    p2m_type_t p2mt;
+    mfn_t mfn;
+    unsigned long gfn = gpa >> PAGE_SHIFT;
+
+    /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
+    mfn = gfn_to_mfn_current(gfn, &p2mt);
+    if ( p2mt == p2m_mmio_dm )
+    {
         handle_mmio(gpa);
-        return 1;
-    }
-
-    paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
-    return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
+        return;
+    }
+
+    /* Log-dirty: mark the page dirty and let the guest write it again */
+    paging_mark_dirty(current->domain, mfn_x(mfn));
+    p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
 }
 
 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
@@ -2341,8 +2350,7 @@ asmlinkage void svm_vmexit_handler(struc
 
     case VMEXIT_NPF:
         regs->error_code = vmcb->exitinfo1;
-        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) )
-            domain_crash(v->domain);
+        svm_do_nested_pgfault(vmcb->exitinfo2, regs);
         break;
 
     default:
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Sep 10 14:42:30 2007 +0100
@@ -566,6 +566,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
 
     if ( c->pending_valid &&
          ((c->pending_type == 1) || (c->pending_type > 6) ||
@@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
 
     if ( c->cr0 & X86_CR0_PG )
     {
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
         {
             gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
             return -EINVAL;
@@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user
          * Note that this leaf lives at <max-hypervisor-leaf> + 1.
          */
         u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
-        unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
+        p2m_type_t p2mt;
+        unsigned long mfn;
         struct vcpu *v = current;
         char *p;
 
+        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
+
         gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
 
         /* 8-byte aligned valid pseudophys address from vmxassist, please. */
-        if ( (value & 7) || (mfn == INVALID_MFN) ||
+        if ( (value & 7) || !p2m_is_ram(p2mt) ||
              !v->arch.hvm_vmx.vmxassist_enabled )
         {
             domain_crash(v->domain);
             return;
         }
+        ASSERT(mfn_valid(mfn));
 
         p = map_domain_page(mfn);
         value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
@@ -1905,11 +1910,12 @@ static int vmx_world_restore(struct vcpu
 static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
 {
     unsigned long mfn = 0;
+    p2m_type_t p2mt;
 
     if ( c->cr0 & X86_CR0_PG )
     {
-        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
-        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
+        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
+        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
         {
             gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
             return -EINVAL;
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c  Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/hap/guest_walk.c  Mon Sep 10 14:42:30 2007 +0100
@@ -28,7 +28,8 @@
 #include <xen/sched.h>
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/domain.h>
-#include <asm/shadow.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
 #include <asm/hap.h>
 
 #include "private.h"
@@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     int lev, index;
     paddr_t gpa = 0;
     unsigned long gpfn, mfn;
+    p2m_type_t p2mt;
     int success = 1;
 
     l1_pgentry_t *l1e;
@@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
     gpfn = (gcr3 >> PAGE_SHIFT);
     for ( lev = mode; lev >= 1; lev-- )
     {
-        mfn = get_mfn_from_gpfn(gpfn);
-        if ( mfn == INVALID_MFN )
+        mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
+        if ( !p2m_is_ram(p2mt) )
         {
             HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
                        lev);
             success = 0;
             break;
         }
+        ASSERT(mfn_valid(mfn));
+
         index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
 
 #if GUEST_PAGING_LEVELS >= 4
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/hap/hap.c Mon Sep 10 14:42:30 2007 +0100
@@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain *
     d->arch.paging.mode |= PG_log_dirty;
     hap_unlock(d);
 
-    /* set l1e entries of P2M table to NOT_WRITABLE. */
-    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+    /* set l1e entries of P2M table to be read-only. */
+    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
     return 0;
 }
@@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain 
     hap_unlock(d);
 
     /* set l1e entries of P2M table with normal mode */
-    p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
+    p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
     return 0;
 }
 
 void hap_clean_dirty_bitmap(struct domain *d)
 {
-    /* mark physical memory as NOT_WRITEABLE and flush the TLB */
-    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+    /* set l1e entries of P2M table to be read-only. */
+    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
 }
 
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/p2m.c     Mon Sep 10 14:42:30 2007 +0100
@@ -4,7 +4,7 @@
  * physical-to-machine mappings for automatically-translated domains.
  *
  * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  *
@@ -93,6 +93,31 @@
 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
 
 
+/* PTE flags for the various types of p2m entry */
+#define P2M_BASE_FLAGS \
+        (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+static unsigned long p2m_type_to_flags(p2m_type_t t) 
+{
+    unsigned long flags = (t & 0x7UL) << 9;
+    switch(t)
+    {
+    case p2m_invalid:
+    default:
+        return flags;
+    case p2m_ram_rw:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW;
+    case p2m_ram_logdirty:
+        return flags | P2M_BASE_FLAGS;
+    case p2m_ram_ro:
+        return flags | P2M_BASE_FLAGS;
+    case p2m_mmio_dm:
+        return flags;
+    case p2m_mmio_direct:
+        return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
+    }
+}
+
 
 // Find the next level's P2M entry, checking for out-of-range gfn's...
 // Returns NULL on error.
@@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d)
 }
 
 mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
 /* Read another domain's p2m entries */
 {
     mfn_t mfn;
-    paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT;
+    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
     l2_pgentry_t *l2e;
     l1_pgentry_t *l1e;
 
     ASSERT(paging_mode_translate(d));
+
+    /* XXX This is for compatibility with the old model, where anything not 
+     * XXX marked as RAM was considered to be emulated MMIO space.
+     * XXX Once we start explicitly registering MMIO regions in the p2m 
+     * XXX we will return p2m_invalid for unmapped gfns */
+    *t = p2m_mmio_dm;
+
     mfn = pagetable_get_mfn(d->arch.phys_table);
 
-
-    if ( gpfn > d->arch.p2m.max_mapped_pfn )
+    if ( gfn > d->arch.p2m.max_mapped_pfn )
         /* This pfn is higher than the highest the p2m map currently holds */
         return _mfn(INVALID_MFN);
 
@@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns
         return _mfn(INVALID_MFN);
     }
     mfn = _mfn(l1e_get_pfn(*l1e));
+    *t = p2m_flags_to_type(l1e_get_flags(*l1e));
     unmap_domain_page(l1e);
 
-    return mfn;
+    ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+    return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
 }
 
 #if P2M_AUDIT
@@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign
         return;
     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
-    ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
-    //ASSERT(mfn_to_gfn(d, mfn) == gfn);
-
-    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
     set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
 }
 
@@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d,
                        unsigned long mfn)
 {
     unsigned long ogfn;
+    p2m_type_t ot;
     mfn_t omfn;
 
     if ( !paging_mode_translate(d) )
@@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d,
 
     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
-    omfn = gfn_to_mfn(d, gfn);
-    if ( mfn_valid(omfn) )
-    {
-        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
+    omfn = gfn_to_mfn(d, gfn, &ot);
+    if ( p2m_is_ram(ot) )
+    {
+        ASSERT(mfn_valid(omfn));
         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
     }
 
@@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d,
         /* This machine frame is already mapped at another physical address */
         P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
                   mfn, ogfn, gfn);
-        if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) )
-        {
+        omfn = gfn_to_mfn(d, ogfn, &ot);
+        if ( p2m_is_ram(ot) )
+        {
+            ASSERT(mfn_valid(omfn));
             P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
                       ogfn , mfn_x(omfn));
             if ( mfn_x(omfn) == mfn )
@@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d,
         }
     }
 
-    set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
-    set_gpfn_from_mfn(mfn, gfn);
+    if ( mfn_valid(_mfn(mfn)) ) 
+    {
+        set_p2m_entry(d, gfn, _mfn(mfn),
+                  p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER);
+        set_gpfn_from_mfn(mfn, gfn);
+    }
+    else
+    {
+        gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
+                 gfn, mfn);
+        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+    }
 
     audit_p2m(d);
     p2m_unlock(d);
 }
 
-/* This function goes through P2M table and modify l1e flags of all pages. Note
- * that physical base address of l1e is intact. This function can be used for
- * special purpose, such as marking physical memory as NOT WRITABLE for
- * tracking dirty pages during live migration.
- */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
-{
-    unsigned long mfn, gfn;
+/* Walk the whole p2m table, changing any entries of the old type
+ * to the new type.  This is used in hardware-assisted paging to 
+ * quickly enable or diable log-dirty tracking */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
+{
+    unsigned long mfn, gfn, flags;
     l1_pgentry_t l1e_content;
     l1_pgentry_t *l1e;
     l2_pgentry_t *l2e;
@@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain 
 
                 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
                 {
-                    if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+                    flags = l1e_get_flags(l1e[i1]);
+                    if ( p2m_flags_to_type(flags) != ot )
                         continue;
                     mfn = l1e_get_pfn(l1e[i1]);
                     gfn = get_gpfn_from_mfn(mfn);
-                    /* create a new 1le entry using l1e_flags */
-                    l1e_content = l1e_from_pfn(mfn, l1e_flags);
+                    /* create a new 1le entry with the new type */
+                    flags = p2m_flags_to_type(nt);
+                    l1e_content = l1e_from_pfn(mfn, flags);
                     paging_write_p2m_entry(d, gfn, &l1e[i1],
                                            l1mfn, l1e_content, 1);
                 }
@@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain 
     p2m_unlock(d);
 }
 
-/* This function traces through P2M table and modifies l1e flags of a specific
- * gpa.
- */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
-{
-    unsigned long gfn;
+/* Modify the p2m type of a single gfn from ot to nt, returning the 
+ * entry's previous type */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 
+                           p2m_type_t ot, p2m_type_t nt)
+{
+    p2m_type_t pt;
     mfn_t mfn;
 
     p2m_lock(d);
 
-    gfn = gpa >> PAGE_SHIFT;
-    mfn = gfn_to_mfn(d, gfn);
-    if ( mfn_valid(mfn) )
-        set_p2m_entry(d, gfn, mfn, l1e_flags);
+    mfn = gfn_to_mfn(d, gfn, &pt);
+    if ( pt == ot )
+        set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt));
 
     p2m_unlock(d);
 
-    return 1;
+    return pt;
 }
 
 /*
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Sep 10 14:42:30 2007 +0100
@@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u
                        l1_pgentry_t new, unsigned int level)
 {
     struct domain *d = v->domain;
-    mfn_t mfn;
     
     shadow_lock(d);
 
-    /* handle physmap_add and physmap_remove */
-    mfn = gfn_to_mfn(d, gfn);
-    if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
-        sh_remove_all_shadows_and_parents(v, mfn);
-        if ( sh_remove_all_mappings(v, mfn) )
-            flush_tlb_mask(d->domain_dirty_cpumask);    
-    }
-    
-    /* update the entry with new content */
+    /* If we're removing an MFN from the p2m, remove it from the shadows too */
+    if ( level == 1 )
+    {
+        mfn_t mfn = _mfn(l1e_get_pfn(*p));
+        p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
+        if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) 
+        {
+            sh_remove_all_shadows_and_parents(v, mfn);
+            if ( sh_remove_all_mappings(v, mfn) )
+                flush_tlb_mask(d->domain_dirty_cpumask);    
+        }
+    }
+
+    /* Update the entry with new content */
     safe_write_pte(p, new);
 
     /* install P2M in monitors for PAE Xen */
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Sep 10 14:42:30 2007 +0100
@@ -209,6 +209,7 @@ guest_walk_tables(struct vcpu *v, unsign
 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
 {
     struct domain *d = v->domain;
+    p2m_type_t p2mt;
     ASSERT(!guest_op || shadow_locked_by_me(d));
     
     perfc_incr(shadow_guest_walk);
@@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign
         + guest_l4_table_offset(va);
     /* Walk down to the l3e */
     if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
-    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
-    if ( !mfn_valid(gw->l3mfn) ) return 1;
+    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
+    if ( !p2m_is_ram(p2mt) ) return 1;
+    ASSERT(mfn_valid(gw->l3mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
     if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
@@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign
 #endif /* PAE or 64... */
     /* Walk down to the l2e */
     if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
-    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
-    if ( !mfn_valid(gw->l2mfn) ) return 1;
+    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
+    if ( !p2m_is_ram(p2mt) ) return 1;
+    ASSERT(mfn_valid(gw->l2mfn));
     /* This mfn is a pagetable: make sure the guest can't write to it. */
     if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
         flush_tlb_mask(d->domain_dirty_cpumask); 
@@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign
     else 
     {
         /* Not a superpage: carry on and find the l1e. */
-        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
-        if ( !mfn_valid(gw->l1mfn) ) return 1;
+        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
+        if ( !p2m_is_ram(p2mt) ) return 1;
+        ASSERT(mfn_valid(gw->l1mfn));
         /* This mfn is a pagetable: make sure the guest can't write to it. */
         if ( guest_op 
              && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
@@ -626,7 +630,7 @@ _sh_propagate(struct vcpu *v,
               void *shadow_entry_ptr,
               int level,
               fetch_type_t ft, 
-              int mmio)
+              p2m_type_t p2mt)
 {
     guest_l1e_t *gp = guest_entry_ptr;
     shadow_l1e_t *sp = shadow_entry_ptr;
@@ -636,6 +640,13 @@ _sh_propagate(struct vcpu *v,
 
     /* We don't shadow PAE l3s */
     ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
+
+    /* Check there's something for the shadows to map to */
+    if ( !p2m_is_valid(p2mt) )
+    {
+        *sp = shadow_l1e_empty();
+        goto done;
+    }
 
     if ( mfn_valid(guest_table_mfn) )
         /* Handle A and D bit propagation into the guest */
@@ -658,19 +669,22 @@ _sh_propagate(struct vcpu *v,
         goto done;
     }
 
-    if ( level == 1 && mmio )
-    {
-        /* Guest l1e maps MMIO space */
+    if ( level == 1 && p2mt == p2m_mmio_dm )
+    {
+        /* Guest l1e maps emulated MMIO space */
         *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
         if ( !d->arch.paging.shadow.has_fast_mmio_entries )
             d->arch.paging.shadow.has_fast_mmio_entries = 1;
         goto done;
     }
 
-    // Must have a valid target_mfn, unless this is a prefetch.  In the
+    // Must have a valid target_mfn unless this is a prefetch.  In the
     // case of a prefetch, an invalid mfn means that we can not usefully
     // shadow anything, and so we return early.
     //
+    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
+     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
+     * MMIO areas need to be added to the frame-table to make them "valid". */
     if ( !mfn_valid(target_mfn) )
     {
         ASSERT((ft == ft_prefetch));
@@ -718,6 +732,8 @@ _sh_propagate(struct vcpu *v,
     // Only allow the guest write access to a page a) on a demand fault,
     // or b) if the page is already marked as dirty.
     //
+    // (We handle log-dirty entirely inside the shadow code, without using the 
+    // p2m_ram_logdirty p2m type: only HAP uses that.)
     if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
     {
         if ( ft & FETCH_TYPE_WRITE ) 
@@ -725,6 +741,10 @@ _sh_propagate(struct vcpu *v,
         else if ( !sh_mfn_is_dirty(d, target_mfn) )
             sflags &= ~_PAGE_RW;
     }
+
+    /* Read-only memory */
+    if ( p2mt == p2m_ram_ro ) 
+        sflags &= ~_PAGE_RW;
     
     // protect guest page tables
     //
@@ -754,7 +774,12 @@ _sh_propagate(struct vcpu *v,
         sflags |= _PAGE_USER;
     }
 
+    /* MMIO addresses should never be cached */
+    if ( p2m_is_mmio(p2mt) )
+        sflags |= _PAGE_PCD;
+
     *sp = shadow_l1e_from_mfn(target_mfn, sflags);
+
  done:
     SHADOW_DEBUG(PROPAGATE,
                  "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
@@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v,
                          shadow_l4e_t *sl4e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
+    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
 }
 
 static void
@@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v,
                          shadow_l3e_t *sl3e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
+    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
 }
 #endif // GUEST_PAGING_LEVELS >= 4
 
@@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v,
                          shadow_l2e_t *sl2e,
                          fetch_type_t ft)
 {
-    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
+    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
 }
 
 static void
@@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v,
                          mfn_t gmfn, 
                          shadow_l1e_t *sl1e,
                          fetch_type_t ft, 
-                         int mmio)
-{
-    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
+                         p2m_type_t p2mt)
+{
+    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
 }
 
 
@@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v,
     shadow_l4e_t *sl4p = se;
     mfn_t sl3mfn = _mfn(INVALID_MFN);
     struct domain *d = v->domain;
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl4e_calls);
@@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v,
     if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
     {
         gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
-        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
-        if ( mfn_valid(gl3mfn) )
+        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
+        if ( p2m_is_ram(p2mt) )
             sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
         else
             result |= SHADOW_SET_ERROR;
@@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v,
     guest_l3e_t *new_gl3e = new_ge;
     shadow_l3e_t *sl3p = se;
     mfn_t sl2mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl3e_calls);
@@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v,
     if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
     {
         gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
-        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
-        if ( mfn_valid(gl2mfn) )
+        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
+        if ( p2m_is_ram(p2mt) )
             sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
         else
             result |= SHADOW_SET_ERROR;
@@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v,
     guest_l2e_t *new_gl2e = new_ge;
     shadow_l2e_t *sl2p = se;
     mfn_t sl1mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt;
     int result = 0;
 
     perfc_incr(shadow_validate_gl2e_calls);
@@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v,
         }
         else
         {
-            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
-            if ( mfn_valid(gl1mfn) )
+            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
+            if ( p2m_is_ram(p2mt) )
                 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
             else
                 result |= SHADOW_SET_ERROR;
@@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v,
     shadow_l1e_t *sl1p = se;
     gfn_t gfn;
     mfn_t gmfn;
-    int result = 0, mmio;
+    p2m_type_t p2mt;
+    int result = 0;
 
     perfc_incr(shadow_validate_gl1e_calls);
 
     gfn = guest_l1e_get_gfn(*new_gl1e);
-    gmfn = gfn_to_mfn(v->domain, gfn);
-
-    mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
+    gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+
     l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
-                             ft_prefetch, mmio);
+                             ft_prefetch, p2mt);
     
     result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
     return result;
@@ -2554,12 +2582,13 @@ static void sh_prefetch(struct vcpu *v, 
 static void sh_prefetch(struct vcpu *v, walk_t *gw, 
                         shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
 {
-    int i, dist, mmio;
+    int i, dist;
     gfn_t gfn;
     mfn_t gmfn;
     guest_l1e_t gl1e;
     shadow_l1e_t sl1e;
     u32 gflags;
+    p2m_type_t p2mt;
 
     /* Prefetch no further than the end of the _shadow_ l1 MFN */
     dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
@@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v, 
 
         /* Look at the gfn that the l1e is pointing at */
         gfn = guest_l1e_get_gfn(gl1e);
-        gmfn = gfn_to_mfn(v->domain, gfn);
-        mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
+        gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
 
         /* Propagate the entry.  Safe to use a pointer to our local 
          * gl1e, since this is not a demand-fetch so there will be no 
          * write-back to the guest. */
         l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
-                                 gmfn, &sl1e, ft_prefetch, mmio);
+                                 gmfn, &sl1e, ft_prefetch, p2mt);
         (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
     }
 }
@@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v,
     paddr_t gpa;
     struct sh_emulate_ctxt emul_ctxt;
     struct x86_emulate_ops *emul_ops;
-    int r, mmio;
+    int r;
     fetch_type_t ft = 0;
+    p2m_type_t p2mt;
 
     SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
                    v->domain->domain_id, v->vcpu_id, va, regs->error_code);
@@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v,
 
     /* What mfn is the guest trying to access? */
     gfn = guest_l1e_get_gfn(gw.eff_l1e);
-    gmfn = gfn_to_mfn(d, gfn);
-    mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
-
-    if ( !mmio && !mfn_valid(gmfn) )
+    gmfn = gfn_to_mfn(d, gfn, &p2mt);
+
+    if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) )
     {
         perfc_incr(shadow_fault_bail_bad_gfn);
         SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
@@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v,
 
     /* Calculate the shadow entry and write it */
     l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
-                             gmfn, &sl1e, ft, mmio);
+                             gmfn, &sl1e, ft, p2mt);
     r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
 
 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
@@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v,
         }
     }
 
-    if ( mmio ) 
+    /* Need to hand off device-model MMIO and writes to read-only
+     * memory to the device model */
+    if ( p2mt == p2m_mmio_dm 
+         || (p2mt == p2m_ram_ro && ft == ft_demand_write) ) 
     {
         gpa = guest_walk_to_gpa(&gw);
         goto mmio;
@@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
         int flush = 0;
         gfn_t gl2gfn;
         mfn_t gl2mfn;
+        p2m_type_t p2mt;
         guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
         /* First, make all four entries read-only. */
         for ( i = 0; i < 4; i++ )
@@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn(d, gl2gfn);
-                flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
+                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+                if ( p2m_is_ram(p2mt) )
+                    flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
             }
         }
         if ( flush ) 
@@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn(d, gl2gfn);
-                sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
-                                       ? SH_type_l2h_shadow 
-                                       : SH_type_l2_shadow);
+                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+                if ( p2m_is_ram(p2mt) )
+                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
+                                           ? SH_type_l2h_shadow 
+                                           : SH_type_l2_shadow);
+                else
+                    sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
             }
             else
-                /* The guest is not present: clear out the shadow. */
                 sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
         }
     }
@@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st
     u32 flags, errcode;
     gfn_t gfn;
     mfn_t mfn;
+    p2m_type_t p2mt;
 
     /* We don't emulate user-mode writes to page tables */
     if ( ring_3(sh_ctxt->ctxt.regs) ) 
@@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st
         }
     }
 #endif
-    mfn = gfn_to_mfn(v->domain, gfn);
 
     errcode = PFEC_write_access;
     if ( !(flags & _PAGE_PRESENT) ) 
@@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st
     if ( !(flags & _PAGE_RW) ) 
         goto page_fault;
 
-    if ( mfn_valid(mfn) )
-    {
+    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+    if ( p2m_is_ram(p2mt) )
+    {
+        ASSERT(mfn_valid(mfn));
         *mfnp = mfn;
         v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
         return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
@@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
 /* Convert this gfn to an mfn in the manner appropriate for the
  * guest pagetable it's used in (gmfn) */ 
 {
+    p2m_type_t p2mt;
     if ( !shadow_mode_translate(v->domain) )
         return _mfn(gfn_x(gfn));
     
@@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
          != PGT_writable_page ) 
         return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
     else 
-        return gfn_to_mfn(v->domain, gfn);
+        return gfn_to_mfn(v->domain, gfn, &p2mt);
 } 
 
 
diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h    Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/arch/x86/mm/shadow/types.h    Mon Sep 10 14:42:30 2007 +0100
@@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn)
 
 /* Override gfn_to_mfn to work with gfn_t */
 #undef gfn_to_mfn
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
 
 
 /* Type used for recording a walk through guest pagetables.  It is
diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/include/asm-x86/mm.h  Mon Sep 10 14:42:30 2007 +0100
@@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn);
       ? get_gpfn_from_mfn(mfn)                          \
       : (mfn) )
 
-#define gmfn_to_mfn(_d, gpfn)  mfn_x(gfn_to_mfn(_d, gpfn))
-
 #define INVALID_MFN             (~0UL)
 
 #ifdef CONFIG_COMPAT
diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Mon Sep 10 13:59:46 2007 +0100
+++ b/xen/include/asm-x86/p2m.h Mon Sep 10 14:42:30 2007 +0100
@@ -4,7 +4,7 @@
  * physical-to-machine mappings for automatically-translated domains.
  *
  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
- * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  *
@@ -27,49 +27,141 @@
 #define _XEN_P2M_H
 
 
-/* The phys_to_machine_mapping is the reversed mapping of MPT for full
- * virtualization.  It is only used by shadow_mode_translate()==true
- * guests, so we steal the address space that would have normally
- * been used by the read-only MPT map.
+/*
+ * The phys_to_machine_mapping maps guest physical frame numbers 
+ * to machine frame numbers.  It only exists for paging_mode_translate 
+ * guests. It is organised in page-table format, which:
+ *
+ * (1) allows us to use it directly as the second pagetable in hardware-
+ *     assisted paging and (hopefully) iommu support; and 
+ * (2) lets us map it directly into the guest vcpus' virtual address space 
+ *     as a linear pagetable, so we can read and write it easily.
+ *
+ * For (2) we steal the address space that would have normally been used
+ * by the read-only MPT map in a non-translated guest.  (For 
+ * paging_mode_external() guests this mapping is in the monitor table.)
  */
 #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
 
-
-/* Read the current domain's P2M table. */
-static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
-{
-    l1_pgentry_t l1e = l1e_empty();
-    int ret;
-
-    if ( gfn > current->domain->arch.p2m.max_mapped_pfn )
-        return _mfn(INVALID_MFN);
-
-    /* Don't read off the end of the p2m table */
-    ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t));
-
-    ret = __copy_from_user(&l1e,
-                           &phys_to_machine_mapping[gfn],
-                           sizeof(l1e));
-
-    if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
-        return _mfn(l1e_get_pfn(l1e));
-
-    return _mfn(INVALID_MFN);
+/*
+ * The upper levels of the p2m pagetable always contain full rights; all 
+ * variation in the access control bits is made in the level-1 PTEs.
+ * 
+ * In addition to the phys-to-machine translation, each p2m PTE contains
+ * *type* information about the gfn it translates, helping Xen to decide
+ * on the correct course of action when handling a page-fault to that
+ * guest frame.  We store the type in the "available" bits of the PTEs
+ * in the table, which gives us 8 possible types on 32-bit systems.
+ * Further expansions of the type system will only be supported on
+ * 64-bit Xen.
+ */
+typedef enum {
+    p2m_invalid = 0,            /* Nothing mapped here */
+    p2m_ram_rw = 1,             /* Normal read/write guest RAM */
+    p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
+    p2m_ram_ro = 3,             /* Read-only; writes go to the device model */
+    p2m_mmio_dm = 4,            /* Reads and write go to the device model */
+    p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
+} p2m_type_t;
+
+/* We use bitmaps and maks to handle groups of types */
+#define p2m_to_mask(_t) (1UL << (_t))
+
+/* RAM types, which map to real machine frames */
+#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)          \
+                       | p2m_to_mask(p2m_ram_logdirty)  \
+                       | p2m_to_mask(p2m_ram_ro))
+
+/* MMIO types, which don't have to map to anything in the frametable */
+#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
+                        | p2m_to_mask(p2m_mmio_direct))
+
+/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
+#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
+                      | p2m_to_mask(p2m_ram_ro))
+
+/* Useful predicates */
+#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
+#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
+#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+
+/* Extract the type from the PTE flags that store it */
+static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
+{
+    /* Type is stored in the "available" bits, 9, 10 and 11 */
+    return (flags >> 9) & 0x7;
+}
+ 
+/* Read the current domain's p2m table (through the linear mapping). */
+static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+{
+    mfn_t mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt = p2m_mmio_dm;
+    /* XXX This is for compatibility with the old model, where anything not 
+     * XXX marked as RAM was considered to be emulated MMIO space.
+     * XXX Once we start explicitly registering MMIO regions in the p2m 
+     * XXX we will return p2m_invalid for unmapped gfns */
+
+    if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
+    {
+        l1_pgentry_t l1e = l1e_empty();
+        int ret;
+
+        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
+               / sizeof(l1_pgentry_t));
+
+        /* Need to __copy_from_user because the p2m is sparse and this
+         * part might not exist */
+        ret = __copy_from_user(&l1e,
+                               &phys_to_machine_mapping[gfn],
+                               sizeof(l1e));
+
+        if ( ret == 0 ) {
+            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t));
+            if ( p2m_is_valid(p2mt) )
+                mfn = _mfn(l1e_get_pfn(l1e));
+            else 
+                /* XXX see above */
+                p2mt = p2m_mmio_dm;
+        }
+    }
+
+    *t = p2mt;
+    return mfn;
 }
 
 /* Read another domain's P2M table, mapping pages as we go */
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
 
 /* General conversion function from gfn to mfn */
-#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
-static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
+static inline mfn_t _gfn_to_mfn(struct domain *d,
+                                unsigned long gfn, p2m_type_t *t)
 {
     if ( !paging_mode_translate(d) )
+    {
+        /* Not necessarily true, but for non-translated guests, we claim
+         * it's the most generic kind of memory */
+        *t = p2m_ram_rw;
         return _mfn(gfn);
+    }
     if ( likely(current->domain == d) )
-        return gfn_to_mfn_current(gfn);
+        return gfn_to_mfn_current(gfn, t);
     else 
-        return gfn_to_mfn_foreign(d, gfn);
+        return gfn_to_mfn_foreign(d, gfn, t);
+}
+
+/* Compatibility function exporting the old untyped interface */
+static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
+{
+    mfn_t mfn;
+    p2m_type_t t;
+    mfn = gfn_to_mfn(d, gpfn, &t);
+    if ( p2m_is_valid(t) )
+        return mfn_x(mfn);
+    return INVALID_MFN;
 }
 
 /* General conversion function from mfn to gfn */
@@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s
         return mfn_x(mfn);
 }
 
-/* Compatibility function for HVM code */
-static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
-{
-    return mfn_x(gfn_to_mfn_current(pfn));
-}
-
-/* Is this guest address an mmio one? (i.e. not defined in p2m map) */
-static inline int mmio_space(paddr_t gpa)
-{
-    unsigned long gfn = gpa >> PAGE_SHIFT;
-    return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn)));
-}
-
 /* Translate the frame number held in an l1e from guest to machine */
 static inline l1_pgentry_t
 gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
@@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
 }
 
 
-
 /* Init the datastructures for later use by the p2m code */
 void p2m_init(struct domain *d);
 
@@ -130,11 +208,12 @@ void guest_physmap_remove_page(struct do
 void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
                                unsigned long mfn);
 
-/* set P2M table l1e flags */
-void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
-
-/* set P2M table l1e flags for a gpa */
-int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
+/* Change types across all p2m entries in a domain */
+void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
+
+/* Compare-exchange the type of a single p2m entry */
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
+                           p2m_type_t ot, p2m_type_t nt);
 
 #endif /* _XEN_P2M_H */
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.