[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC 2/4] x86/mem_access: mem_access and mem_event changes to support PV domains



mem_access changes
------------------
Two new memory hypercalls, XENMEM_access_op_create_ring_page and
XENMEM_access_op_get_ring_mfn have been added. The mem_access listener
makes these calls during setup. The ring page is created from the
xenheap and shared with the guest. It is freed when mem_access is
disabled or when the domain is shutdown or destroyed.
XENMEM_access_op_[sg]et_access hypercalls are modified to accomodate
calls for a PV domain. When setting the access permissions for a mfn,
all shadows for that mfn are dropped. They get recreated with the new
permissions on the next page fault for that mfn. To get the permissions
for a mfn, value is returned from the p2m_access lookup table.

mem_event changes
-----------------
The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE/DISABLE hypercalls are
modified to allow mem_access to work with PV domains. When the access
listener goes to enable mem_access for a PV domain, shadow PG_mem_access
mode is turned on and the p2m structures are initialized. When
disabling, this mode is turned off but domain continues to run in shadow
mode. The p2m structures are also freed on disabling.

Signed-off-by: Aravindh Puthiyaparambil <aravindp@xxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>
---
 xen/arch/x86/domain.c            |  12 ++++
 xen/arch/x86/mm/mem_access.c     | 146 ++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/mm/mem_event.c      |  77 +++++++++++++++++----
 xen/include/asm-x86/domain.h     |   3 +
 xen/include/asm-x86/mem_access.h |   4 ++
 xen/include/public/memory.h      |   2 +
 6 files changed, 222 insertions(+), 22 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 2a9c6fc..0ea03b4 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -57,6 +57,7 @@
 #include <asm/nmi.h>
 #include <asm/mce.h>
 #include <asm/amd.h>
+#include <asm/mem_access.h>
 #include <xen/numa.h>
 #include <xen/iommu.h>
 #include <compat/vcpu.h>
@@ -593,8 +594,11 @@ int arch_domain_create(struct domain *d, unsigned int 
domcr_flags)
         }
     }
     else
+    {
         /* 64-bit PV guest by default. */
         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
+        d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
+    }
 
     /* initialize default tsc behavior in case tools don't */
     tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
@@ -632,8 +636,16 @@ void arch_domain_destroy(struct domain *d)
 
     free_perdomain_mappings(d);
     if ( is_pv_domain(d) )
+    {
         free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
 
+        /*
+         * Free the PV mem_access ring xenheap page in the case where a
+         * mem_access listener is present while the domain is being destroyed.
+         */
+        mem_access_free_pv_ring(d);
+    }
+
     free_xenheap_page(d->shared_info);
     cleanup_domain_irq_mapping(d);
 }
diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index 462c318..2340a46 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -26,8 +26,36 @@
 #include <xen/hypercall.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
+#include <asm/shadow.h>
 #include <xsm/xsm.h>
 
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
+#undef mfn_to_page
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+
+static inline bool_t domain_valid_for_mem_access(struct domain *d)
+{
+    if ( is_hvm_domain(d) )
+    {
+        /* Only HAP is supported */
+        if ( !hap_enabled(d) )
+            return 0;
+
+        /* Currently only EPT is supported */
+        if ( !cpu_has_vmx )
+            return 0;
+    }
+    /*
+     * Only PV guests using shadow mem_access mode and running on CPUs with
+     * the NX bit are supported
+     */
+    else if ( !shadow_mode_mem_access(d) || !cpu_has_nx )
+        return 0;
+
+    return 1;
+}
 
 int mem_access_memop(unsigned long cmd,
                      XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg)
@@ -43,16 +71,14 @@ int mem_access_memop(unsigned long cmd,
     if ( rc )
         return rc;
 
-    rc = -EINVAL;
-    if ( !is_hvm_domain(d) )
-        goto out;
-
     rc = xsm_mem_event_op(XSM_TARGET, d, XENMEM_access_op);
     if ( rc )
         goto out;
 
     rc = -ENODEV;
-    if ( unlikely(!d->mem_event->access.ring_page) )
+    if ( unlikely(!d->mem_event->access.ring_page &&
+                  mao.op != XENMEM_access_op_create_ring_page &&
+                  mao.op != XENMEM_access_op_get_ring_mfn) )
         goto out;
 
     switch ( mao.op )
@@ -65,12 +91,27 @@ int mem_access_memop(unsigned long cmd,
     case XENMEM_access_op_set_access:
     {
         unsigned long start_iter = cmd & ~MEMOP_CMD_MASK;
+        unsigned long pfn = mao.pfn;
 
         rc = -EINVAL;
-        if ( (mao.pfn != ~0ull) &&
+        if ( !domain_valid_for_mem_access(d) )
+            break;
+
+        if ( unlikely(is_pv_domain(d)  && pfn != ~0ull) )
+            pfn = get_gpfn_from_mfn(mao.pfn);
+
+        /*
+         * max_pfn for PV domains is obtained from the shared_info structures
+         * that the guest maintains. It is up to the guest to maintain this and
+         * is not filled in during early boot. So we do not check if we are
+         * crossing max_pfn here and will depend on the checks in
+         * p2m_mem_access_set_entry().
+         */
+        if ( (pfn != ~0ull) &&
              (mao.nr < start_iter ||
-              ((mao.pfn + mao.nr - 1) < mao.pfn) ||
-              ((mao.pfn + mao.nr - 1) > domain_get_maximum_gpfn(d))) )
+              ((pfn + mao.nr - 1) < pfn) ||
+              ((pfn + mao.nr - 1) > domain_get_maximum_gpfn(d) &&
+                !is_pv_domain(d))) )
             break;
 
         rc = p2m_set_mem_access(d, mao.pfn, mao.nr, start_iter,
@@ -89,7 +130,18 @@ int mem_access_memop(unsigned long cmd,
         xenmem_access_t access;
 
         rc = -EINVAL;
-        if ( (mao.pfn > domain_get_maximum_gpfn(d)) && mao.pfn != ~0ull )
+        if ( !domain_valid_for_mem_access(d) )
+            break;
+
+        /*
+         * max_pfn for PV domains is obtained from the shared_info structures
+         * that the guest maintains. It is up to the guest to maintain this and
+         * is not filled in during early boot. So we do not check if we are
+         * crossing max_pfn here and will depend on the checks in
+         * p2m_mem_access_get_entry().
+         */
+        if ( (mao.pfn > domain_get_maximum_gpfn(d) && !is_pv_domain(d)) &&
+             mao.pfn != ~0ull )
             break;
 
         rc = p2m_get_mem_access(d, mao.pfn, &access);
@@ -102,6 +154,67 @@ int mem_access_memop(unsigned long cmd,
         break;
     }
 
+    case XENMEM_access_op_create_ring_page:
+    {
+        void *access_ring_va;
+
+        /*
+         * mem_access listeners for HVM domains need not call
+         * xc_mem_access_set_ring_pfn() as the special ring page would have 
been
+         * setup during domain creation.
+         */
+        rc = -ENOSYS;
+        if ( is_hvm_domain(d) )
+            break;
+
+        /*
+         * The ring page was created by a mem_access listener but was not
+         * freed. Do not allow another xenheap page to be allocated.
+         */
+        if ( mfn_valid(d->arch.pv_domain.access_ring_mfn) )
+        {
+            rc = -EPERM;
+            break;
+        }
+
+        access_ring_va = alloc_xenheap_page();
+        if ( access_ring_va == NULL )
+        {
+            rc = -ENOMEM;
+            break;
+        }
+
+        clear_page(access_ring_va);
+        share_xen_page_with_guest(virt_to_page(access_ring_va), d,
+                                  XENSHARE_writable);
+
+        d->arch.pv_domain.access_ring_mfn = _mfn(virt_to_mfn(access_ring_va));
+
+        rc = 0;
+        break;
+    }
+
+    case XENMEM_access_op_get_ring_mfn:
+        /*
+         * mem_access listeners for HVM domains should call xc_get_hvm_param()
+         * instead of xc_mem_access_get_ring_pfn().
+         */
+        rc = -ENOSYS;
+        if ( is_hvm_domain(d) )
+            break;
+
+        if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
+        {
+            rc = -ENODEV;
+            break;
+        }
+
+        mao.pfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
+        rc = __copy_field_to_guest(arg, &mao, pfn) ? -EFAULT : 0;
+
+        rc = 0;
+        break;
+
     default:
         rc = -ENOSYS;
         break;
@@ -123,6 +236,21 @@ int mem_access_send_req(struct domain *d, 
mem_event_request_t *req)
     return 0;
 } 
 
+/* Free the xenheap page used for the PV access ring */
+void mem_access_free_pv_ring(struct domain *d)
+{
+    struct page_info *pg = mfn_to_page(d->arch.pv_domain.access_ring_mfn);
+
+    if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
+        return;
+
+    BUG_ON(page_get_owner(pg) != d);
+    if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) )
+        put_page(pg);
+    free_xenheap_page(mfn_to_virt(mfn_x(d->arch.pv_domain.access_ring_mfn)));
+    d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
index 36b9dba..9b45504 100644
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -25,6 +25,7 @@
 #include <xen/event.h>
 #include <xen/wait.h>
 #include <asm/p2m.h>
+#include <asm/shadow.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
 #include <asm/mem_access.h>
@@ -49,7 +50,12 @@ static int mem_event_enable(
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
-    unsigned long ring_gfn = d->arch.hvm_domain.params[param];
+    unsigned long ring_gfn;
+
+    if ( is_pv_domain(d) && param == HVM_PARAM_ACCESS_RING_PFN )
+        ring_gfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
+    else
+        ring_gfn = d->arch.hvm_domain.params[param];
 
     /* Only one helper at a time. If the helper crashed,
      * the ring is in an undefined state and so is the guest.
@@ -581,28 +587,73 @@ int mem_event_domctl(struct domain *d, 
xen_domctl_mem_event_op_t *mec,
         switch( mec->op )
         {
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
-        {
             rc = -ENODEV;
-            /* Only HAP is supported */
-            if ( !hap_enabled(d) )
-                break;
+            if ( !is_pv_domain(d) )
+            {
+                /* Only HAP is supported */
+                if ( !hap_enabled(d) )
+                    break;
 
-            /* Currently only EPT is supported */
-            if ( !cpu_has_vmx )
-                break;
+                /* Currently only EPT is supported */
+                if ( !cpu_has_vmx )
+                    break;
+            }
+            /* PV guests use shadow mem_access mode */
+            else
+            {
+                domain_pause(d);
+                if ( !shadow_mode_mem_access(d) )
+                {
+                    rc = shadow_enable_mem_access(d);
+                    if ( rc != 0 )
+                        goto pv_out;
+                }
+
+                rc = p2m_mem_access_init(p2m_get_hostp2m(d));
+                if ( rc != 0 )
+                {
+                    shadow_disable_mem_access(d);
+                    goto pv_out;
+                }
+            }
 
             rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
                                     HVM_PARAM_ACCESS_RING_PFN,
                                     mem_access_notification);
-        }
-        break;
+
+            if ( !is_pv_domain(d) )
+                break;
+
+ pv_out:
+            if ( rc != 0 )
+            {
+                shadow_disable_mem_access(d);
+                p2m_mem_access_teardown(p2m_get_hostp2m(d));
+                mem_access_free_pv_ring(d);
+            }
+            domain_unpause(d);
+
+            break;
 
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
-        {
             if ( med->ring_page )
                 rc = mem_event_disable(d, med);
-        }
-        break;
+
+            /*
+             * Even if mem_access was not enabled, some resources could have
+             * been created in the PV case.
+             */
+            if ( is_pv_domain(d) )
+            {
+                domain_pause(d);
+                if ( shadow_mode_mem_access(d) )
+                    shadow_disable_mem_access(d);
+
+                p2m_mem_access_teardown(p2m_get_hostp2m(d));
+                mem_access_free_pv_ring(d);
+                domain_unpause(d);
+            }
+            break;
 
         default:
             rc = -ENOSYS;
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index c5c266f..884b799 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -227,6 +227,9 @@ struct pv_domain
 
     /* map_domain_page() mapping cache. */
     struct mapcache_domain mapcache;
+
+    /* mfn of the mem_access ring page for PV domains */
+    mfn_t access_ring_mfn;
 };
 
 struct arch_domain
diff --git a/xen/include/asm-x86/mem_access.h b/xen/include/asm-x86/mem_access.h
index 5c7c5fd..c7be52c 100644
--- a/xen/include/asm-x86/mem_access.h
+++ b/xen/include/asm-x86/mem_access.h
@@ -27,6 +27,10 @@ int mem_access_memop(unsigned long cmd,
                      XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg);
 int mem_access_send_req(struct domain *d, mem_event_request_t *req);
 
+
+/* Free the xenheap page used for the access ring */
+void mem_access_free_pv_ring(struct domain *d);
+
 #endif /* _XEN_ASM_MEM_ACCESS_H */
 
 /*
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 5bcd475..1070636 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -380,6 +380,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t);
 #define XENMEM_access_op_resume             0
 #define XENMEM_access_op_set_access         1
 #define XENMEM_access_op_get_access         2
+#define XENMEM_access_op_create_ring_page   3
+#define XENMEM_access_op_get_ring_mfn       4
 
 typedef enum {
     XENMEM_access_n,
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.