[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [V13 PATCH 1/2] pvh dom0: Add and remove foreign pages



In this patch, a new function, p2m_add_foreign(), is added
to map pages from a foreign guest into dom0 for various purposes
like domU creation, running xentrace, etc... Such pages are
typed p2m_map_foreign.  Note, it is the nature of such pages
that a refcnt is held during their stay in the p2m. The
refcnt is added and released in the low level ept function
atomic_write_ept_entry. That macro is converted to a function to allow
for such refcounting, which only applies to leaf entries in the ept.
Furthermore, please note that paging/sharing is disabled if the
controlling or hardware domain is pvh. Any enabling of those features
would need to ensure refcnt are properly maintained for foreign types,
or paging/sharing is skipped for foreign types.

Also, we change get_pg_owner so it allows foreign mappings for pvh.

Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>
---
 xen/arch/x86/mm.c           |   4 +-
 xen/arch/x86/mm/mem_event.c |  11 ++++
 xen/arch/x86/mm/p2m-ept.c   |  98 +++++++++++++++++++++++++++-------
 xen/arch/x86/mm/p2m-pt.c    |   7 +++
 xen/arch/x86/mm/p2m.c       | 126 +++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-x86/p2m.h   |   7 +++
 6 files changed, 226 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1a8a5e0..d005c34 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2811,7 +2811,7 @@ static struct domain *get_pg_owner(domid_t domid)
         goto out;
     }
 
-    if ( unlikely(paging_mode_translate(curr)) )
+    if ( !is_pvh_domain(curr) && unlikely(paging_mode_translate(curr)) )
     {
         MEM_LOG("Cannot mix foreign mappings with translated domains");
         goto out;
@@ -4584,6 +4584,8 @@ int xenmem_add_to_physmap_one(
             page = mfn_to_page(mfn);
             break;
         }
+        case XENMAPSPACE_gmfn_foreign:
+            return p2m_add_foreign(d, idx, gpfn, foreign_domid);
         default:
             break;
     }
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index 36b9dba..7722dcb 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -538,6 +538,12 @@ int mem_event_domctl(struct domain *d, 
xen_domctl_mem_event_op_t *mec,
         case XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE:
         {
             struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+            rc = -EOPNOTSUPP;
+            /* pvh fixme: p2m_is_foreign types need addressing */
+            if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) )
+                break;
+
             rc = -ENODEV;
             /* Only HAP is supported */
             if ( !hap_enabled(d) )
@@ -620,6 +626,11 @@ int mem_event_domctl(struct domain *d, 
xen_domctl_mem_event_op_t *mec,
         {
         case XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE:
         {
+            rc = -EOPNOTSUPP;
+            /* pvh fixme: p2m_is_foreign types need addressing */
+            if ( is_pvh_vcpu(current) || is_pvh_domain(hardware_domain) )
+                break;
+
             rc = -ENODEV;
             /* Only HAP is supported */
             if ( !hap_enabled(d) )
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index bb98945..fe6bcd8 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -36,8 +36,6 @@
 
 #define atomic_read_ept_entry(__pepte)                              \
     ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
-#define atomic_write_ept_entry(__pepte, __epte)                     \
-    write_atomic(&(__pepte)->epte, (__epte).epte)
 
 #define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
 #define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
@@ -46,6 +44,58 @@ static inline bool_t is_epte_valid(ept_entry_t *e)
     return (e->epte != 0 && e->sa_p2mt != p2m_invalid);
 }
 
+/* returns : 0 for success, -errno otherwise */
+static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new,
+                                  int level)
+{
+    int rc = 0;
+    unsigned long oldmfn = INVALID_MFN;
+    bool_t skip_foreign = (new.mfn == entryptr->mfn &&
+                           new.sa_p2mt == entryptr->sa_p2mt);
+
+    if ( level )
+    {
+        ASSERT(!is_epte_superpage(&new) || !p2m_is_foreign(new.sa_p2mt));
+        write_atomic(&entryptr->epte, new.epte);
+        goto out;
+    }
+
+    if ( unlikely(p2m_is_foreign(new.sa_p2mt)) && !skip_foreign )
+    {
+        struct domain *fdom;
+
+        rc = -EINVAL;
+        if ( !mfn_valid(new.mfn) )
+            goto out;
+
+        rc = -ESRCH;
+        fdom = page_get_owner(mfn_to_page(new.mfn));
+        if ( fdom == NULL )
+            goto out;
+
+        /* get refcount on the page */
+        rc = -EBUSY;
+        if ( !get_page(mfn_to_page(new.mfn), fdom) )
+            goto out;
+    }
+
+    if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && !skip_foreign )
+        oldmfn = entryptr->mfn;
+
+    write_atomic(&entryptr->epte, new.epte);
+
+    if ( unlikely(oldmfn != INVALID_MFN) )
+        put_page(mfn_to_page(oldmfn));
+
+    rc = 0;
+
+ out:
+    if ( rc )
+        gdprintk(XENLOG_ERR, "epte o:%"PRIx64" n:%"PRIx64" rc:%d\n",
+                 entryptr->epte, new.epte, rc);
+    return rc;
+}
+
 static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, 
p2m_access_t access)
 {
     /* First apply type permissions */
@@ -275,8 +325,9 @@ static int ept_next_level(struct p2m_domain *p2m, bool_t 
read_only,
  * present entries in the given page table, optionally marking the entries
  * also for their subtrees needing P2M type re-calculation.
  */
-static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc)
+static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc, int level)
 {
+    int rc;
     ept_entry_t *epte = map_domain_page(mfn_x(mfn));
     unsigned int i;
     bool_t changed = 0;
@@ -292,7 +343,8 @@ static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc)
         e.emt = MTRR_NUM_TYPES;
         if ( recalc )
             e.recalc = 1;
-        atomic_write_ept_entry(&epte[i], e);
+        rc = atomic_write_ept_entry(&epte[i], e, level);
+        ASSERT(rc == 0);
         changed = 1;
     }
 
@@ -316,7 +368,7 @@ static int ept_invalidate_emt_range(struct p2m_domain *p2m,
     ept_entry_t *table;
     unsigned long gfn_remainder = first_gfn;
     unsigned int i, index;
-    int rc = 0, ret = GUEST_TABLE_MAP_FAILED;
+    int wrc, rc = 0, ret = GUEST_TABLE_MAP_FAILED;
 
     table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
     for ( i = ept_get_wl(&p2m->ept); i > target; --i )
@@ -342,7 +394,8 @@ static int ept_invalidate_emt_range(struct p2m_domain *p2m,
             rc = -ENOMEM;
             goto out;
         }
-        atomic_write_ept_entry(&table[index], split_ept_entry);
+        wrc = atomic_write_ept_entry(&table[index], split_ept_entry, i);
+        ASSERT(wrc == 0);
 
         for ( ; i > target; --i )
             if ( !ept_next_level(p2m, 1, &table, &gfn_remainder, i) )
@@ -361,7 +414,8 @@ static int ept_invalidate_emt_range(struct p2m_domain *p2m,
         {
             e.emt = MTRR_NUM_TYPES;
             e.recalc = 1;
-            atomic_write_ept_entry(&table[index], e);
+            wrc = atomic_write_ept_entry(&table[index], e, target);
+            ASSERT(wrc == 0);
             rc = 1;
         }
     }
@@ -390,7 +444,7 @@ static int resolve_misconfig(struct p2m_domain *p2m, 
unsigned long gfn)
     unsigned int level = ept_get_wl(ept);
     unsigned long mfn = ept_get_asr(ept);
     ept_entry_t *epte;
-    int rc = 0;
+    int wrc, rc = 0;
 
     if ( !mfn )
         return 0;
@@ -431,7 +485,8 @@ static int resolve_misconfig(struct p2m_domain *p2m, 
unsigned long gfn)
                          ept_p2m_type_to_flags(&e, e.sa_p2mt, e.access);
                     }
                     e.recalc = 0;
-                    atomic_write_ept_entry(&epte[i], e);
+                    wrc = atomic_write_ept_entry(&epte[i], e, level);
+                    ASSERT(wrc == 0);
                 }
             }
             else
@@ -465,7 +520,8 @@ static int resolve_misconfig(struct p2m_domain *p2m, 
unsigned long gfn)
                 {
                     if ( ept_split_super_page(p2m, &e, level, level - 1) )
                     {
-                        atomic_write_ept_entry(&epte[i], e);
+                        wrc = atomic_write_ept_entry(&epte[i], e, level);
+                        ASSERT(wrc == 0);
                         unmap_domain_page(epte);
                         mfn = e.mfn;
                         continue;
@@ -479,7 +535,8 @@ static int resolve_misconfig(struct p2m_domain *p2m, 
unsigned long gfn)
                 e.recalc = 0;
                 if ( recalc && p2m_is_changeable(e.sa_p2mt) )
                     ept_p2m_type_to_flags(&e, e.sa_p2mt, e.access);
-                atomic_write_ept_entry(&epte[i], e);
+                wrc = atomic_write_ept_entry(&epte[i], e, level);
+                ASSERT(wrc == 0);
             }
 
             rc = 1;
@@ -489,11 +546,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, 
unsigned long gfn)
         if ( e.emt == MTRR_NUM_TYPES )
         {
             ASSERT(is_epte_present(&e));
-            ept_invalidate_emt(_mfn(e.mfn), e.recalc);
+            ept_invalidate_emt(_mfn(e.mfn), e.recalc, level);
             smp_wmb();
             e.emt = 0;
             e.recalc = 0;
-            atomic_write_ept_entry(&epte[i], e);
+            wrc = atomic_write_ept_entry(&epte[i], e, level);
+            ASSERT(wrc == 0);
             unmap_domain_page(epte);
             rc = 1;
         }
@@ -585,6 +643,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, 
mfn_t mfn,
     ASSERT((target == 2 && hvm_hap_has_1gb()) ||
            (target == 1 && hvm_hap_has_2mb()) ||
            (target == 0));
+    ASSERT(!p2m_is_foreign(p2mt) || target == 0);
 
     table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
 
@@ -649,7 +708,8 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, 
mfn_t mfn,
 
         /* now install the newly split ept sub-tree */
         /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
-        atomic_write_ept_entry(ept_entry, split_ept_entry);
+        rc = atomic_write_ept_entry(ept_entry, split_ept_entry, i);
+        ASSERT(rc == 0);
 
         /* then move to the level we want to make real changes */
         for ( ; i > target; i-- )
@@ -688,10 +748,10 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, 
mfn_t mfn,
         ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
     }
 
-    atomic_write_ept_entry(ept_entry, new_entry);
+    rc = atomic_write_ept_entry(ept_entry, new_entry, target);
 
     /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( p2mt != p2m_invalid &&
+    if ( rc == 0 && p2mt != p2m_invalid &&
          (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
         p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
 
@@ -723,7 +783,7 @@ out:
        last thing we do, after the ept_sync_domain() and removal
        from the iommu tables, so as to avoid a potential
        use-after-free. */
-    if ( is_epte_present(&old_entry) )
+    if ( rc == 0 && is_epte_present(&old_entry) )
         ept_free_entry(p2m, &old_entry, target);
 
     return rc;
@@ -893,7 +953,7 @@ static void ept_change_entry_type_global(struct p2m_domain 
*p2m,
     if ( !mfn )
         return;
 
-    if ( ept_invalidate_emt(_mfn(mfn), 1) )
+    if ( ept_invalidate_emt(_mfn(mfn), 1, ept_get_wl(&p2m->ept)) )
         ept_sync_domain(p2m);
 }
 
@@ -951,7 +1011,7 @@ static void ept_memory_type_changed(struct p2m_domain *p2m)
     if ( !mfn )
         return;
 
-    if ( ept_invalidate_emt(_mfn(mfn), 0) )
+    if ( ept_invalidate_emt(_mfn(mfn), 0, ept_get_wl(&p2m->ept)) )
         ept_sync_domain(p2m);
 }
 
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index cd9867a..a1794d0 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -513,6 +513,13 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long 
gfn, mfn_t mfn,
         __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
     }
 
+    if ( unlikely(p2m_is_foreign(p2mt)) )
+    {
+        /* pvh fixme: foreign types are only supported on ept at present */
+        gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n");
+        return -EINVAL;
+    }
+
     /* Carry out any eventually pending earlier changes first. */
     rc = do_recalc(p2m, gfn);
     if ( rc < 0 )
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index b50747a..642ec28 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -36,6 +36,7 @@
 #include <xen/event.h>
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
+#include <xsm/xsm.h>
 
 #include "mm-locks.h"
 
@@ -311,14 +312,20 @@ struct page_info *get_page_from_gfn_p2m(
         /* Fast path: look up and get out */
         p2m_read_lock(p2m);
         mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
-        if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
-             && mfn_valid(mfn)
+        if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
              && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
         {
             page = mfn_to_page(mfn);
-            if ( !get_page(page, d)
-                 /* Page could be shared */
-                 && !get_page(page, dom_cow) )
+            if ( unlikely(p2m_is_foreign(*t)) )
+            {
+                struct domain *fdom = page_get_owner_and_reference(page);
+                ASSERT(fdom != d);
+                if ( fdom == NULL )
+                    page = NULL;
+            }
+            else if ( !get_page(page, d)
+                      /* Page could be shared */
+                      && !get_page(page, dom_cow) )
                 page = NULL;
         }
         p2m_read_unlock(p2m);
@@ -468,6 +475,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
     return rc;
 }
 
+/*
+ * pvh fixme: when adding support for pvh non-hardware domains, this path must
+ * cleanup any foreign p2m types (release refcnts on them).
+ */
 void p2m_teardown(struct p2m_domain *p2m)
 /* Return all the p2m pages to Xen.
  * We know we don't have any extra mappings to these pages */
@@ -836,8 +847,8 @@ static int set_typed_p2m_entry(struct domain *d, unsigned 
long gfn, mfn_t mfn,
 }
 
 /* Set foreign mfn in the given guest's p2m table. */
-static int __attribute__((unused))
-set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
+                                 mfn_t mfn)
 {
     return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign);
 }
@@ -1794,6 +1805,107 @@ out_p2m_audit:
 #endif /* P2M_AUDIT */
 
 /*
+ * Add frame from foreign domain to target domain's physmap. Similar to
+ * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
+ * and is not removed from foreign domain.
+ *
+ * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
+ *        - xentrace running on dom0 mapping xenheap pages. foreigndom would
+ *          be DOMID_XEN in such a case.
+ *        etc..
+ *
+ * Side Effect: the mfn for fgfn will be refcounted in lower level routines
+ *              so it is not lost while mapped here. The refcnt is released
+ *              via the XENMEM_remove_from_physmap path.
+ *
+ * Returns: 0 ==> success
+ */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+                    unsigned long gpfn, domid_t foreigndom)
+{
+    p2m_type_t p2mt, p2mt_prev;
+    unsigned long prev_mfn, mfn;
+    struct page_info *page;
+    int rc;
+    struct domain *fdom;
+
+    ASSERT(tdom);
+    if ( foreigndom == DOMID_SELF || !is_pvh_domain(tdom) )
+        return -EINVAL;
+    /*
+     * pvh fixme: until support is added to p2m teardown code to cleanup any
+     * foreign entries, limit this to hardware domain only.
+     */
+    if ( !is_hardware_domain(tdom) )
+        return -EPERM;
+
+    if ( foreigndom == DOMID_XEN )
+        fdom = rcu_lock_domain(dom_xen);
+    else
+        fdom = rcu_lock_domain_by_id(foreigndom);
+    if ( fdom == NULL )
+        return -ESRCH;
+
+    rc = -EINVAL;
+    if ( tdom == fdom )
+        goto out;
+
+    rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
+    if ( rc )
+        goto out;
+
+    /*
+     * Take a refcnt on the mfn. NB: following supported for foreign mapping:
+     *     ram_rw | ram_logdirty | ram_ro | paging_out.
+     */
+    page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
+    if ( !page ||
+         !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
+    {
+        if ( page )
+            put_page(page);
+        rc = -EINVAL;
+        goto out;
+    }
+    mfn = mfn_x(page_to_mfn(page));
+
+    /* Remove previously mapped page if it is present. */
+    prev_mfn = mfn_x(get_gfn(tdom, gpfn, &p2mt_prev));
+    if ( mfn_valid(_mfn(prev_mfn)) )
+    {
+        if ( is_xen_heap_mfn(prev_mfn) )
+            /* Xen heap frames are simply unhooked from this phys slot */
+            guest_physmap_remove_page(tdom, gpfn, prev_mfn, 0);
+        else
+            /* Normal domain memory is freed, to avoid leaking memory. */
+            guest_remove_page(tdom, gpfn);
+    }
+    /*
+     * Create the new mapping. Can't use guest_physmap_add_page() because it
+     * will update the m2p table which will result in  mfn -> gpfn of dom0
+     * and not fgfn of domU.
+     */
+    rc = set_foreign_p2m_entry(tdom, gpfn, _mfn(mfn));
+    if ( rc )
+        gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
+                 "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
+                 gpfn, mfn, fgfn, tdom->domain_id, fdom->domain_id);
+
+    put_page(page);
+
+    /*
+     * This put_gfn for the above get_gfn for prev_mfn.  We must do this
+     * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
+     * before us.
+     */
+    put_gfn(tdom, gpfn);
+
+out:
+    if ( fdom )
+        rcu_unlock_domain(fdom);
+    return rc;
+}
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 027f011..d0cfdac 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -188,6 +188,10 @@ typedef unsigned int p2m_query_t;
 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
 #define p2m_is_foreign(_t)  (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
 
+#define p2m_is_any_ram(_t)  (p2m_to_mask(_t) &                   \
+                             (P2M_RAM_TYPES | P2M_GRANT_TYPES |  \
+                              p2m_to_mask(p2m_map_foreign)))
+
 /* Per-p2m-table state */
 struct p2m_domain {
     /* Lock that protects updates to the p2m */
@@ -532,6 +536,9 @@ int p2m_is_logdirty_range(struct p2m_domain *, unsigned 
long start,
 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
 
+/* Add foreign mapping to the guest's p2m table. */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+                    unsigned long gpfn, domid_t foreign_domid);
 
 /* 
  * Populate-on-demand
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.