[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC] [PATCH] x86/mm: use wait queues for mem_paging



Hi, 

As promised, a revised version of the waitq-on-missing-pages patch.
I've cut it back a fair bit; I think the resulting patch is maybe a bit
less efficient, but easier to understand.

I've smoke-tested it a bit and haven't seen anything catch fire but I
suspect the VMs aren't very happy.  I'll debug more throughly when I'm
back in the office next week, but would really like some feedback from
the rest of you in the meantime.

Cheers,

Tim.

# HG changeset patch
# User Tim Deegan <tim@xxxxxxx>
# Parent 01c1bcbc62224833304ede44187400f65e8a6b4c
[RFC] x86/mm: use wait queues for mem_paging

Use a wait queue to put a guest vcpu to sleep while the requested gfn is
in paging state. This adds missing p2m_mem_paging_populate() calls to
some callers of the new get_gfn* variants, which would crash now
because they get an invalid mfn. It also fixes guest crashes due to
unexpected returns from do_memory_op because copy_to/from_guest ran into
a paged gfn. Now those places will always get a valid mfn.

This is based on an earlier RFC patch by Olaf Hering, but heavily
simplified (removing a per-gfn queue of waiting vcpus in favour of
a scan of all vcpus on page-in).

Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Signed-off-by: Tim Deegan <tim@xxxxxxx>

diff -r 01c1bcbc6222 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Thu Feb 16 08:48:23 2012 +0100
+++ b/xen/arch/x86/mm/p2m.c     Thu Feb 16 14:39:13 2012 +0000
@@ -160,13 +160,49 @@ mfn_t __get_gfn_type_access(struct p2m_d
     }
 
     /* For now only perform locking on hap domains */
-    if ( locked && (hap_enabled(p2m->domain)) )
+    locked = locked && hap_enabled(p2m->domain);
+
+#ifdef __x86_64__
+again:
+#endif
+    if ( locked )
         /* Grab the lock here, don't release until put_gfn */
         gfn_lock(p2m, gfn, 0);
 
     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
 
 #ifdef __x86_64__
+    if ( p2m_is_paging(*t) && q != p2m_query 
+         && p2m->domain == current->domain ) 
+    {
+        if ( locked )
+            gfn_unlock(p2m, gfn, 0);
+
+        /* Ping the pager */
+        if ( *t == p2m_ram_paging_out || *t == p2m_ram_paged )
+            p2m_mem_paging_populate(p2m->domain, gfn);
+
+        /* Safety catch: it _should_ be safe to wait here but if it's not, 
+         * crash the VM, not the host */
+        if ( in_atomic() )
+        {
+            WARN();
+            domain_crash(p2m->domain);
+        }
+        else
+        {
+            current->arch.mem_paging_gfn = gfn;
+            wait_event(current->arch.mem_paging_wq, ({
+                        mfn = p2m->get_entry(p2m, gfn, t, a,
+                                             p2m_query, page_order);
+                        (*t != p2m_ram_paging_in);
+                    }));
+            goto again;
+        }
+    }
+#endif
+
+#ifdef __x86_64__
     if ( q == p2m_unshare && p2m_is_shared(*t) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
@@ -942,25 +978,25 @@ void p2m_mem_paging_drop_page(struct dom
  * This function needs to be called whenever gfn_to_mfn() returns any of the 
p2m
  * paging types because the gfn may not be backed by a mfn.
  *
- * The gfn can be in any of the paging states, but the pager needs only be
- * notified when the gfn is in the paging-out path (paging_out or paged).  This
- * function may be called more than once from several vcpus. If the vcpu 
belongs
- * to the guest, the vcpu must be stopped and the pager notified that the vcpu
- * was stopped. The pager needs to handle several requests for the same gfn.
+ * The gfn can be in any of the paging states, but the pager needs only
+ * be notified when the gfn is in the paging-out path (paging_out or
+ * paged).  This function may be called more than once from several
+ * vcpus.  The pager needs to handle several requests for the same gfn.
  *
- * If the gfn is not in the paging-out path and the vcpu does not belong to the
- * guest, nothing needs to be done and the function assumes that a request was
- * already sent to the pager. In this case the caller has to try again until 
the
- * gfn is fully paged in again.
+ * If the gfn is not in the paging-out path nothing needs to be done and
+ * the function assumes that a request was already sent to the pager.
+ * In this case the caller has to try again until the gfn is fully paged
+ * in again.
  */
 void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
 {
     struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = {0};
     p2m_type_t p2mt;
     p2m_access_t a;
     mfn_t mfn;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    int send_request = 0;
 
     /* We're paging. There should be a ring */
     int rc = mem_event_claim_slot(d, &d->mem_event->paging);
@@ -974,9 +1010,6 @@ void p2m_mem_paging_populate(struct doma
     else if ( rc < 0 )
         return;
 
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_PAGING;
-
     /* Fix p2m mapping */
     gfn_lock(p2m, gfn, 0);
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
@@ -986,26 +1019,29 @@ void p2m_mem_paging_populate(struct doma
         /* Evict will fail now, tag this request for pager */
         if ( p2mt == p2m_ram_paging_out )
             req.flags |= MEM_EVENT_FLAG_EVICT_FAIL;
-
-        set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
+        if ( p2mt == p2m_ram_paging_out && mfn_valid(mfn) && v->domain == d )
+            /* Short-cut back to paged-in state (but not for foreign 
+             * mappings, or the pager couldn't map it to page it out) */
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
+                          paging_mode_log_dirty(d) 
+                          ? p2m_ram_logdirty : p2m_ram_rw, a);
+        else
+        {
+            set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
+            send_request = 1;
+        }
     }
     gfn_unlock(p2m, gfn, 0);
 
-    /* Pause domain if request came from guest and gfn has paging type */
-    if ( p2m_is_paging(p2mt) && v->domain == d )
+    /* No need to inform pager if the gfn is not in the page-out path */
+    if ( !send_request ) 
     {
-        vcpu_pause_nosync(v);
-        req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
-    }
-    /* No need to inform pager if the gfn is not in the page-out path */
-    else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
-    {
-        /* gfn is already on its way back and vcpu is not paused */
         mem_event_cancel_slot(d, &d->mem_event->paging);
         return;
     }
 
     /* Send request to pager */
+    req.type = MEM_EVENT_TYPE_PAGING;
     req.gfn = gfn;
     req.p2mt = p2mt;
     req.vcpu_id = v->vcpu_id;
@@ -1122,6 +1158,7 @@ void p2m_mem_paging_resume(struct domain
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     mem_event_response_t rsp;
+    struct vcpu *v;
     p2m_type_t p2mt;
     p2m_access_t a;
     mfn_t mfn;
@@ -1147,9 +1184,10 @@ void p2m_mem_paging_resume(struct domain
             }
             gfn_unlock(p2m, rsp.gfn, 0);
         }
-        /* Unpause domain */
-        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+        /* Wake any vcpus that were waiting for this GFN */
+        for_each_vcpu ( d, v )
+            if ( v->arch.mem_paging_gfn == rsp.gfn )
+                wake_up_all(&v->arch.mem_paging_wq);
     }
 }
 
diff -r 01c1bcbc6222 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu Feb 16 08:48:23 2012 +0100
+++ b/xen/include/asm-x86/domain.h      Thu Feb 16 14:39:13 2012 +0000
@@ -4,6 +4,7 @@
 #include <xen/config.h>
 #include <xen/mm.h>
 #include <xen/radix-tree.h>
+#include <xen/wait.h>
 #include <asm/hvm/vcpu.h>
 #include <asm/hvm/domain.h>
 #include <asm/e820.h>
@@ -491,6 +492,12 @@ struct arch_vcpu
     
     struct paging_vcpu paging;
 
+#ifdef CONFIG_X86_64
+    /* Mem-paging: this vcpu is waiting for a gfn to be paged in */
+    struct waitqueue_head mem_paging_wq;
+    unsigned long mem_paging_gfn;
+#endif
+
 #ifdef CONFIG_X86_32
     /* map_domain_page() mapping cache. */
     struct mapcache_vcpu mapcache;

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.