|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [V11 PATCH 3/4] pvh dom0: Add and remove foreign pages
In this patch, a new function, p2m_add_foreign(), is added
to map pages from a foreign guest into dom0 for various purposes
like domU creation, running xentrace, etc... Such pages are
typed p2m_map_foreign. Note, it is the nature of such pages
that a refcnt is held during their stay in the p2m. The
refcnt is added and released in the low level ept function
atomic_write_ept_entry. That macro is converted to a function to allow
for such refcounting, which only applies to leaf entries in the ept.
Also, get_pg_owner is changed to allow pvh to map foreign mappings,
and is made public to be used by p2m_add_foreign().
Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>
---
xen/arch/x86/mm.c | 9 ++--
xen/arch/x86/mm/p2m-ept.c | 70 ++++++++++++++++++++------
xen/arch/x86/mm/p2m-pt.c | 7 +++
xen/arch/x86/mm/p2m.c | 122 +++++++++++++++++++++++++++++++++++++++++++---
xen/include/asm-x86/mm.h | 2 +
xen/include/asm-x86/p2m.h | 7 +++
6 files changed, 192 insertions(+), 25 deletions(-)
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1a8a5e0..8d12c30 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2795,7 +2795,7 @@ int new_guest_cr3(unsigned long mfn)
return rc;
}
-static struct domain *get_pg_owner(domid_t domid)
+struct domain *get_pg_owner(domid_t domid)
{
struct domain *pg_owner = NULL, *curr = current->domain;
@@ -2811,7 +2811,7 @@ static struct domain *get_pg_owner(domid_t domid)
goto out;
}
- if ( unlikely(paging_mode_translate(curr)) )
+ if ( !is_pvh_domain(curr) && unlikely(paging_mode_translate(curr)) )
{
MEM_LOG("Cannot mix foreign mappings with translated domains");
goto out;
@@ -2838,7 +2838,7 @@ static struct domain *get_pg_owner(domid_t domid)
return pg_owner;
}
-static void put_pg_owner(struct domain *pg_owner)
+void put_pg_owner(struct domain *pg_owner)
{
rcu_unlock_domain(pg_owner);
}
@@ -4584,6 +4584,9 @@ int xenmem_add_to_physmap_one(
page = mfn_to_page(mfn);
break;
}
+ case XENMAPSPACE_gmfn_foreign:
+ rc = p2m_add_foreign(d, idx, gpfn, foreign_domid);
+ return rc;
default:
break;
}
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 7926bc4..eb80118 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -36,8 +36,6 @@
#define atomic_read_ept_entry(__pepte) \
( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
-#define atomic_write_ept_entry(__pepte, __epte) \
- write_atomic(&(__pepte)->epte, (__epte).epte)
#define is_epte_present(ept_entry) ((ept_entry)->epte & 0x7)
#define is_epte_superpage(ept_entry) ((ept_entry)->sp)
@@ -46,6 +44,47 @@ static inline bool_t is_epte_valid(ept_entry_t *e)
return (e->epte != 0 && e->sa_p2mt != p2m_invalid);
}
+/* returns : 0 for success, -errno otherwise */
+static int atomic_write_ept_entry(ept_entry_t *entryptr, ept_entry_t new,
+ int level)
+{
+ bool_t same_mfn = (new.mfn == entryptr->mfn);
+ unsigned long oldmfn = INVALID_MFN;
+
+ if ( level )
+ {
+ ASSERT(!p2m_is_foreign(new.sa_p2mt));
+ write_atomic(&entryptr->epte, new.epte);
+ return 0;
+ }
+
+ if ( unlikely(p2m_is_foreign(new.sa_p2mt)) && !same_mfn )
+ {
+ struct domain *fdom;
+
+ if ( !mfn_valid(new.mfn) )
+ return -EINVAL;
+
+ fdom = page_get_owner(mfn_to_page(new.mfn));
+ if ( fdom == NULL )
+ return -ESRCH;
+
+ /* get refcount on the page */
+ if ( !get_page(mfn_to_page(new.mfn), fdom) )
+ return -EBUSY;
+ }
+
+ if ( unlikely(p2m_is_foreign(entryptr->sa_p2mt)) && !same_mfn )
+ oldmfn = entryptr->mfn;
+
+ write_atomic(&entryptr->epte, new.epte);
+
+ if ( unlikely(oldmfn != INVALID_MFN) )
+ put_page(mfn_to_page(oldmfn));
+
+ return 0;
+}
+
static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type,
p2m_access_t access)
{
/* First apply type permissions */
@@ -271,7 +310,7 @@ static int ept_next_level(struct p2m_domain *p2m, bool_t
read_only,
return GUEST_TABLE_NORMAL_PAGE;
}
-static bool_t ept_invalidate_emt(mfn_t mfn)
+static bool_t ept_invalidate_emt(mfn_t mfn, int level)
{
ept_entry_t *epte = map_domain_page(mfn_x(mfn));
unsigned int i;
@@ -286,7 +325,7 @@ static bool_t ept_invalidate_emt(mfn_t mfn)
continue;
e.emt = MTRR_NUM_TYPES;
- atomic_write_ept_entry(&epte[i], e);
+ atomic_write_ept_entry(&epte[i], e, level);
changed = 1;
}
@@ -341,7 +380,7 @@ bool_t ept_handle_misconfig(uint64_t gpa)
_mfn(e.mfn), 0, &ipat,
e.sa_p2mt == p2m_mmio_direct);
e.ipat = ipat;
- atomic_write_ept_entry(&epte[i], e);
+ atomic_write_ept_entry(&epte[i], e, level);
}
}
else
@@ -353,7 +392,7 @@ bool_t ept_handle_misconfig(uint64_t gpa)
{
if ( ept_split_super_page(p2m, &e, level, level - 1) )
{
- atomic_write_ept_entry(&epte[i], e);
+ atomic_write_ept_entry(&epte[i], e, level);
unmap_domain_page(epte);
mfn = e.mfn;
continue;
@@ -364,7 +403,7 @@ bool_t ept_handle_misconfig(uint64_t gpa)
}
e.emt = emt;
e.ipat = ipat;
- atomic_write_ept_entry(&epte[i], e);
+ atomic_write_ept_entry(&epte[i], e, level);
}
okay = 1;
@@ -374,10 +413,10 @@ bool_t ept_handle_misconfig(uint64_t gpa)
if ( e.emt == MTRR_NUM_TYPES )
{
ASSERT(is_epte_present(&e));
- ept_invalidate_emt(_mfn(e.mfn));
+ ept_invalidate_emt(_mfn(e.mfn), level);
smp_wmb();
e.emt = 0;
- atomic_write_ept_entry(&epte[i], e);
+ atomic_write_ept_entry(&epte[i], e, level);
unmap_domain_page(epte);
okay = 1;
}
@@ -444,6 +483,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn,
mfn_t mfn,
ASSERT((target == 2 && hvm_hap_has_1gb()) ||
(target == 1 && hvm_hap_has_2mb()) ||
(target == 0));
+ ASSERT(!p2m_is_foreign(p2mt) || target == 0);
table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
@@ -507,7 +547,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn,
mfn_t mfn,
/* now install the newly split ept sub-tree */
/* NB: please make sure domian is paused and no in-fly VT-d DMA. */
- atomic_write_ept_entry(ept_entry, split_ept_entry);
+ atomic_write_ept_entry(ept_entry, split_ept_entry, i);
/* then move to the level we want to make real changes */
for ( ; i > target; i-- )
@@ -546,10 +586,10 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn,
mfn_t mfn,
ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
}
- atomic_write_ept_entry(ept_entry, new_entry);
+ rc = atomic_write_ept_entry(ept_entry, new_entry, i);
/* Track the highest gfn for which we have ever had a valid mapping */
- if ( p2mt != p2m_invalid &&
+ if ( rc == 0 && p2mt != p2m_invalid &&
(gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
@@ -581,7 +621,7 @@ out:
last thing we do, after the ept_sync_domain() and removal
from the iommu tables, so as to avoid a potential
use-after-free. */
- if ( is_epte_present(&old_entry) )
+ if ( rc == 0 && is_epte_present(&old_entry) )
ept_free_entry(p2m, &old_entry, target);
return rc;
@@ -761,7 +801,7 @@ static void ept_change_entry_type_page(mfn_t ept_page_mfn,
int ept_page_level,
e.sa_p2mt = nt;
ept_p2m_type_to_flags(&e, nt, e.access);
- atomic_write_ept_entry(&epte[i], e);
+ atomic_write_ept_entry(&epte[i], e, ept_page_level);
}
}
@@ -792,7 +832,7 @@ static void ept_memory_type_changed(struct p2m_domain *p2m)
if ( !mfn )
return;
- if ( ept_invalidate_emt(_mfn(mfn)) )
+ if ( ept_invalidate_emt(_mfn(mfn), ept_get_wl(&p2m->ept)) )
ept_sync_domain(p2m);
}
diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
index 56a1593..1bbf0fe 100644
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -308,6 +308,13 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long
gfn, mfn_t mfn,
__trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
}
+ if ( p2m_is_foreign(p2mt) )
+ {
+ /* pvh fixme: foreign types are only supported on ept at present */
+ gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n");
+ return -EINVAL;
+ }
+
table = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m))));
rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
L4_PAGETABLE_SHIFT - PAGE_SHIFT,
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 5923968..b1e23b0 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -36,6 +36,7 @@
#include <xen/event.h>
#include <asm/hvm/nestedhvm.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
+#include <xsm/xsm.h>
#include "mm-locks.h"
@@ -287,14 +288,20 @@ struct page_info *get_page_from_gfn_p2m(
/* Fast path: look up and get out */
p2m_read_lock(p2m);
mfn = __get_gfn_type_access(p2m, gfn, t, a, 0, NULL, 0);
- if ( (p2m_is_ram(*t) || p2m_is_grant(*t))
- && mfn_valid(mfn)
+ if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
&& !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
{
page = mfn_to_page(mfn);
- if ( !get_page(page, d)
- /* Page could be shared */
- && !get_page(page, dom_cow) )
+ if ( p2m_is_foreign(*t) )
+ {
+ struct domain *fdom = page_get_owner_and_reference(page);
+ ASSERT(fdom != d);
+ if ( fdom == NULL )
+ page = NULL;
+ }
+ else if ( !get_page(page, d)
+ /* Page could be shared */
+ && !get_page(page, dom_cow) )
page = NULL;
}
p2m_read_unlock(p2m);
@@ -444,6 +451,10 @@ int p2m_alloc_table(struct p2m_domain *p2m)
return rc;
}
+/*
+ * pvh fixme: when adding support for pvh non-hardware domains, this path must
+ * cleanup any foreign p2m types (release refcnts on them).
+ */
void p2m_teardown(struct p2m_domain *p2m)
/* Return all the p2m pages to Xen.
* We know we don't have any extra mappings to these pages */
@@ -795,8 +806,8 @@ static int set_typed_p2m_entry(struct domain *d, unsigned
long gfn, mfn_t mfn,
}
/* Set foreign mfn in the given guest's p2m table. */
-static int __attribute__((unused))
-set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
+ mfn_t mfn)
{
return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign);
}
@@ -1757,6 +1768,103 @@ out_p2m_audit:
#endif /* P2M_AUDIT */
/*
+ * Add frame from foreign domain to target domain's physmap. Similar to
+ * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
+ * and is not removed from foreign domain.
+ *
+ * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
+ * - xentrace running on dom0 mapping xenheap pages. foreigndom would
+ * be DOMID_XEN in such a case.
+ * etc..
+ *
+ * Side Effect: the mfn for fgfn will be refcounted in lower level routines
+ * so it is not lost while mapped here. The refcnt is released
+ * via the XENMEM_remove_from_physmap path.
+ *
+ * Returns: 0 ==> success
+ */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+ unsigned long gpfn, domid_t foreigndom)
+{
+ p2m_type_t p2mt, p2mt_prev;
+ unsigned long prev_mfn, mfn;
+ struct page_info *page;
+ int rc = -EINVAL;
+ struct domain *fdom = NULL;
+
+ ASSERT(tdom);
+ if ( foreigndom == DOMID_SELF || !is_pvh_domain(tdom) )
+ return -EINVAL;
+
+ /*
+ * pvh fixme: until support is added to p2m teardown code to cleanup any
+ * foreign entries, limit this to hardware domain only.
+ */
+ if ( !is_hardware_domain(tdom) )
+ return -EPERM;
+
+ fdom = get_pg_owner(foreigndom);
+ if ( fdom == NULL )
+ return -ESRCH;
+
+ if ( tdom == fdom )
+ goto out;
+
+ rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
+ if ( rc )
+ goto out;
+
+ /*
+ * Take a refcnt on the mfn. NB: following supported for foreign mapping:
+ * ram_rw | ram_logdirty | ram_ro | paging_out.
+ */
+ page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
+ if ( !page ||
+ !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
+ {
+ if ( page )
+ put_page(page);
+ goto out;
+ }
+ mfn = mfn_x(page_to_mfn(page));
+
+ /* Remove previously mapped page if it is present. */
+ prev_mfn = mfn_x(get_gfn(tdom, gpfn, &p2mt_prev));
+ if ( mfn_valid(_mfn(prev_mfn)) )
+ {
+ if ( is_xen_heap_mfn(prev_mfn) )
+ /* Xen heap frames are simply unhooked from this phys slot */
+ guest_physmap_remove_page(tdom, gpfn, prev_mfn, 0);
+ else
+ /* Normal domain memory is freed, to avoid leaking memory. */
+ guest_remove_page(tdom, gpfn);
+ }
+ /*
+ * Create the new mapping. Can't use guest_physmap_add_page() because it
+ * will update the m2p table which will result in mfn -> gpfn of dom0
+ * and not fgfn of domU.
+ */
+ rc = set_foreign_p2m_entry(tdom, gpfn, _mfn(mfn));
+ if ( rc )
+ gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
+ "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
+ gpfn, mfn, fgfn, tdom->domain_id, fdom->domain_id);
+
+ put_page(page);
+
+ /*
+ * This put_gfn for the above get_gfn for prev_mfn. We must do this
+ * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
+ * before us.
+ */
+ put_gfn(tdom, gpfn);
+
+out:
+ if ( fdom )
+ put_pg_owner(fdom);
+ return rc;
+}
+/*
* Local variables:
* mode: C
* c-file-style: "BSD"
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 7059adc..6bf6e72 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -358,6 +358,8 @@ int put_old_guest_table(struct vcpu *);
int get_page_from_l1e(
l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner);
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner);
+struct domain *get_pg_owner(domid_t domid);
+void put_pg_owner(struct domain *pg_owner);
static inline void put_page_and_type(struct page_info *page)
{
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 86847e9..72517d2 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -183,6 +183,10 @@ typedef unsigned int p2m_query_t;
#define p2m_is_broken(_t) (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
#define p2m_is_foreign(_t) (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
+#define p2m_is_any_ram(_t) (p2m_to_mask(_t) & \
+ (P2M_RAM_TYPES | P2M_GRANT_TYPES | \
+ p2m_to_mask(p2m_map_foreign)))
+
/* Per-p2m-table state */
struct p2m_domain {
/* Lock that protects updates to the p2m */
@@ -515,6 +519,9 @@ void p2m_memory_type_changed(struct domain *d);
int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
+/* Add foreign mapping to the guest's p2m table. */
+int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
+ unsigned long gpfn, domid_t foreign_domid);
/*
* Populate-on-demand
--
1.8.3.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |