[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] x86: Improve the efficiency of domain_relinquish_resources()



commit 0dfffe01d5681ede6a50c6b57131320d9f4a3361
Author:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Wed Feb 20 13:39:20 2019 +0000
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Fri Feb 22 14:10:42 2019 +0000

    x86: Improve the efficiency of domain_relinquish_resources()
    
    pci_release_devices() takes the global PCI lock.  Once pci_release_devices()
    has completed, it will be called redundantly each time paging_teardown() and
    vcpu_destroy_pagetables() continue.
    
    This is liable to be millions of times for a reasonably sized guest, and is 
a
    serialising bottleneck now that domain_kill() can be run concurrently on
    different domains.
    
    Instead of propagating the opencoding of the relinquish state machine, take
    the opportunity to clean it up.
    
    Leave a proper set of comments explaining that domain_relinquish_resources()
    implements a co-routine.  Introduce a documented PROGRESS() macro to avoid
    latent bugs such as the RELMEM_xen case, and make the new PROG_* states
    private to domain_relinquish_resources().
    
    Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Reviewed-by: Wei Liu <wei.liu2@xxxxxxxxxx>
    Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
    Release-acked-by: Juergen Gross <jgross@xxxxxxxx>
---
 xen/arch/x86/domain.c        | 71 +++++++++++++++++++++++++++++---------------
 xen/include/asm-x86/domain.h | 10 +------
 2 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 32dc4253ff..7a29435cb3 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -475,8 +475,6 @@ int arch_domain_create(struct domain *d,
     int rc;
 
     INIT_LIST_HEAD(&d->arch.pdev_list);
-
-    d->arch.relmem = RELMEM_not_started;
     INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
 
     spin_lock_init(&d->arch.e820_lock);
@@ -2020,18 +2018,51 @@ int domain_relinquish_resources(struct domain *d)
 
     BUG_ON(!cpumask_empty(d->dirty_cpumask));
 
-    switch ( d->arch.relmem )
+    /*
+     * This hypercall can take minutes of wallclock time to complete.  This
+     * logic implements a co-routine, stashing state in struct domain across
+     * hypercall continuation boundaries.
+     */
+    switch ( d->arch.rel_priv )
     {
-    case RELMEM_not_started:
+        /*
+         * Record the current progress.  Subsequent hypercall continuations
+         * will logically restart work from this point.
+         *
+         * PROGRESS() markers must not be in the middle of loops.  The loop
+         * variable isn't preserved across a continuation.
+         *
+         * To avoid redundant work, there should be a marker before each
+         * function which may return -ERESTART.
+         */
+#define PROGRESS(x)                                                     \
+        d->arch.rel_priv = PROG_ ## x; /* Fallthrough */ case PROG_ ## x
+
+        enum {
+            PROG_paging = 1,
+            PROG_vcpu_pagetables,
+            PROG_shared,
+            PROG_xen,
+            PROG_l4,
+            PROG_l3,
+            PROG_l2,
+            PROG_done,
+        };
+
+    case 0:
         ret = pci_release_devices(d);
         if ( ret )
             return ret;
 
+    PROGRESS(paging):
+
         /* Tear down paging-assistance stuff. */
         ret = paging_teardown(d);
         if ( ret )
             return ret;
 
+    PROGRESS(vcpu_pagetables):
+
         /* Drop the in-use references to page-table bases. */
         for_each_vcpu ( d, v )
         {
@@ -2058,10 +2089,7 @@ int domain_relinquish_resources(struct domain *d)
             d->arch.auto_unmask = 0;
         }
 
-        d->arch.relmem = RELMEM_shared;
-        /* fallthrough */
-
-    case RELMEM_shared:
+    PROGRESS(shared):
 
         if ( is_hvm_domain(d) )
         {
@@ -2072,45 +2100,40 @@ int domain_relinquish_resources(struct domain *d)
                 return ret;
         }
 
-        d->arch.relmem = RELMEM_xen;
-
         spin_lock(&d->page_alloc_lock);
         page_list_splice(&d->arch.relmem_list, &d->page_list);
         INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
         spin_unlock(&d->page_alloc_lock);
 
-        /* Fallthrough. Relinquish every page of memory. */
-    case RELMEM_xen:
+    PROGRESS(xen):
+
         ret = relinquish_memory(d, &d->xenpage_list, ~0UL);
         if ( ret )
             return ret;
-        d->arch.relmem = RELMEM_l4;
-        /* fallthrough */
 
-    case RELMEM_l4:
+    PROGRESS(l4):
+
         ret = relinquish_memory(d, &d->page_list, PGT_l4_page_table);
         if ( ret )
             return ret;
-        d->arch.relmem = RELMEM_l3;
-        /* fallthrough */
 
-    case RELMEM_l3:
+    PROGRESS(l3):
+
         ret = relinquish_memory(d, &d->page_list, PGT_l3_page_table);
         if ( ret )
             return ret;
-        d->arch.relmem = RELMEM_l2;
-        /* fallthrough */
 
-    case RELMEM_l2:
+    PROGRESS(l2):
+
         ret = relinquish_memory(d, &d->page_list, PGT_l2_page_table);
         if ( ret )
             return ret;
-        d->arch.relmem = RELMEM_done;
-        /* fallthrough */
 
-    case RELMEM_done:
+    PROGRESS(done):
         break;
 
+#undef PROGRESS
+
     default:
         BUG();
     }
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 277f99f633..58ade0b48e 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -314,15 +314,7 @@ struct arch_domain
     int page_alloc_unlock_level;
 
     /* Continuable domain_relinquish_resources(). */
-    enum {
-        RELMEM_not_started,
-        RELMEM_shared,
-        RELMEM_xen,
-        RELMEM_l4,
-        RELMEM_l3,
-        RELMEM_l2,
-        RELMEM_done,
-    } relmem;
+    unsigned int rel_priv;
     struct page_list_head relmem_list;
 
     const struct arch_csw {
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.