[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 6/9] x86/np2m: send flush IPIs only when a vcpu is actively using an np2m

To: <xen-devel@xxxxxxxxxxxxx>
From: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>
Date: Tue, 3 Oct 2017 16:21:01 +0100
Cc: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>, Kevin Tian <kevin.tian@xxxxxxxxx>, Jun Nakajima <jun.nakajima@xxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxxxxx>, Andrew Cooper <Andrew.Cooper3@xxxxxxxxxx>, Tim Deegan <tim@xxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>, Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
Delivery-date: Tue, 03 Oct 2017 15:21:22 +0000
List-id: Xen developer discussion <xen-devel.lists.xen.org>

Flush IPIs are sent to all cpus in an np2m's dirty_cpumask when
updated.  This mask however is far too broad.  A pcpu's bit is set in
the cpumask when a vcpu runs on that pcpu, but is only cleared when a
flush happens.  This means that the IPI includes the current pcpu of
vcpus that are not currently running, and also includes any pcpu that
has ever had a vcpu use this p2m since the last flush (which in turn
will cause spurious invalidations if a different vcpu is using an np2m).

Avoid these IPIs by keeping closer track of where an np2m is being used,
and when a vcpu needs to be flushed:

- On schedule-out, clear v->processor in p2m->dirty_cpumask
- Add a 'generation' counter to the p2m and nestedvcpu structs to
  detect changes that would require re-loads on re-entry
- On schedule-in or p2m change:
  - Set v->processor in p2m->dirty_cpumask
  - flush the vcpu's nested p2m pointer (and update nv->generation) if
    the generation changed

Signed-off-by: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxx>
---
v2 --> v3:
- current pointer is now calculated only once in np2m_schedule()
- Replaced "shadow p2m" with "np2m" for consistency in commit message
---
 xen/arch/x86/domain.c          |  2 ++
 xen/arch/x86/hvm/nestedhvm.c   |  1 +
 xen/arch/x86/hvm/vmx/vvmx.c    |  3 +++
 xen/arch/x86/mm/p2m.c          | 56 +++++++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/hvm/vcpu.h |  1 +
 xen/include/asm-x86/p2m.h      |  6 +++++
 6 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 466a1a2fac..35ea0d2418 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1668,6 +1668,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
     {
         _update_runstate_area(prev);
         vpmu_switch_from(prev);
+        np2m_schedule(NP2M_SCHEDLE_OUT);
     }
 
     if ( is_hvm_domain(prevd) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )
@@ -1716,6 +1717,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
 
         /* Must be done with interrupts enabled */
         vpmu_switch_to(next);
+        np2m_schedule(NP2M_SCHEDLE_IN);
     }
 
     /* Ensure that the vcpu has an up-to-date time base. */
diff --git a/xen/arch/x86/hvm/nestedhvm.c b/xen/arch/x86/hvm/nestedhvm.c
index 74a464d162..ab50b2ab98 100644
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -57,6 +57,7 @@ nestedhvm_vcpu_reset(struct vcpu *v)
     nv->nv_flushp2m = 0;
     nv->nv_p2m = NULL;
     nv->stale_np2m = false;
+    nv->np2m_generation = 0;
 
     hvm_asid_flush_vcpu_asid(&nv->nv_n2asid);
 
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 3f596dc698..198ca72f2a 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1367,6 +1367,9 @@ static void virtual_vmexit(struct cpu_user_regs *regs)
          !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
         shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
 
+    /* This will clear current pCPU bit in p2m->dirty_cpumask */
+    np2m_schedule(NP2M_SCHEDLE_OUT);
+
     vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa);
 
     nestedhvm_vcpu_exit_guestmode(v);
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index fd48a3b9db..3c62292165 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -73,6 +73,7 @@ static int p2m_initialise(struct domain *d, struct p2m_domain 
*p2m)
     p2m->p2m_class = p2m_host;
 
     p2m->np2m_base = P2M_BASE_EADDR;
+    p2m->np2m_generation = 0;
 
     for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
         p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
@@ -1735,6 +1736,7 @@ p2m_flush_table_locked(struct p2m_domain *p2m)
 
     /* This is no longer a valid nested p2m for any address space */
     p2m->np2m_base = P2M_BASE_EADDR;
+    p2m->np2m_generation++;
 
     /* Make sure nobody else is using this p2m table */
     nestedhvm_vmcx_flushtlb(p2m);
@@ -1809,6 +1811,7 @@ static void assign_np2m(struct vcpu *v, struct p2m_domain 
*p2m)
 
     nv->nv_flushp2m = 0;
     nv->nv_p2m = p2m;
+    nv->np2m_generation = p2m->np2m_generation;
     cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
 }
 
@@ -1840,7 +1843,9 @@ p2m_get_nestedp2m_locked(struct vcpu *v)
         p2m_lock(p2m);
         if ( p2m->np2m_base == np2m_base || p2m->np2m_base == P2M_BASE_EADDR )
         {
-            if ( p2m->np2m_base == P2M_BASE_EADDR )
+            /* Check if np2m was flushed just before the lock */
+            if ( p2m->np2m_base == P2M_BASE_EADDR ||
+                 nv->np2m_generation != p2m->np2m_generation )
                 nvcpu_flush(v);
             p2m->np2m_base = np2m_base;
             assign_np2m(v, p2m);
@@ -1848,6 +1853,11 @@ p2m_get_nestedp2m_locked(struct vcpu *v)
 
             return p2m;
         }
+        else
+        {
+            /* vCPU is switching from some other valid np2m */
+            cpumask_clear_cpu(v->processor, p2m->dirty_cpumask);
+        }
         p2m_unlock(p2m);
     }
 
@@ -1881,6 +1891,50 @@ p2m_get_p2m(struct vcpu *v)
     return p2m_get_nestedp2m(v);
 }
 
+void np2m_schedule(int dir)
+{
+    struct vcpu *curr = current;
+    struct nestedvcpu *nv = &vcpu_nestedhvm(curr);
+    struct p2m_domain *p2m;
+
+    ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT);
+
+    if ( !nestedhvm_enabled(curr->domain) ||
+         !nestedhvm_vcpu_in_guestmode(curr) ||
+         !nestedhvm_paging_mode_hap(curr) )
+        return;
+
+    p2m = nv->nv_p2m;
+    if ( p2m )
+    {
+        bool np2m_valid;
+
+        p2m_lock(p2m);
+        np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(curr) &&
+                     nv->np2m_generation == p2m->np2m_generation;
+        if ( dir == NP2M_SCHEDLE_OUT && np2m_valid )
+        {
+            /*
+             * The np2m is up to date but this vCPU will no longer use it,
+             * which means there are no reasons to send a flush IPI.
+             */
+            cpumask_clear_cpu(curr->processor, p2m->dirty_cpumask);
+        }
+        else if ( dir == NP2M_SCHEDLE_IN )
+        {
+            if ( !np2m_valid )
+            {
+                /* This vCPU's np2m was flushed while it was not runnable */
+                hvm_asid_flush_core();
+                vcpu_nestedhvm(curr).nv_p2m = NULL;
+            }
+            else
+                cpumask_set_cpu(curr->processor, p2m->dirty_cpumask);
+        }
+        p2m_unlock(p2m);
+    }
+}
+
 unsigned long paging_gva_to_gfn(struct vcpu *v,
                                 unsigned long va,
                                 uint32_t *pfec)
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 27330242e3..d93166fb92 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -116,6 +116,7 @@ struct nestedvcpu {
     bool_t nv_flushp2m; /* True, when p2m table must be flushed */
     struct p2m_domain *nv_p2m; /* used p2m table for this vcpu */
     bool stale_np2m; /* True when p2m_base in VMCx02 is no longer valid */
+    uint64_t np2m_generation;
 
     struct hvm_vcpu_asid nv_n2asid;
 
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 9a757792ee..182463b247 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -209,6 +209,7 @@ struct p2m_domain {
      * to set it to any other value. */
 #define P2M_BASE_EADDR     (~0ULL)
     uint64_t           np2m_base;
+    uint64_t           np2m_generation;
 
     /* Nested p2ms: linked list of n2pms allocated to this domain. 
      * The host p2m hasolds the head of the list and the np2ms are 
@@ -371,6 +372,11 @@ struct p2m_domain *p2m_get_nestedp2m_locked(struct vcpu 
*v);
  */
 struct p2m_domain *p2m_get_p2m(struct vcpu *v);
 
+#define NP2M_SCHEDLE_IN  0
+#define NP2M_SCHEDLE_OUT 1
+
+void np2m_schedule(int dir);
+
 static inline bool_t p2m_is_hostp2m(const struct p2m_domain *p2m)
 {
     return p2m->p2m_class == p2m_host;
-- 
2.11.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

Follow-Ups:
- Re: [Xen-devel] [PATCH v3 6/9] x86/np2m: send flush IPIs only when a vcpu is actively using an np2m
  - From: George Dunlap

References:
- [Xen-devel] [PATCH v3 0/9] Nested p2m: allow sharing between vCPUs
  - From: Sergey Dyasli

Prev by Date: [Xen-devel] [PATCH v3 4/9] x86/np2m: simplify nestedhvm_hap_nested_page_fault()
Next by Date: [Xen-devel] [PATCH v3 8/9] x86/np2m: refactor p2m_get_nestedp2m_locked()
Previous by thread: Re: [Xen-devel] [PATCH v3 4/9] x86/np2m: simplify nestedhvm_hap_nested_page_fault()
Next by thread: Re: [Xen-devel] [PATCH v3 6/9] x86/np2m: send flush IPIs only when a vcpu is actively using an np2m
Index(es):
- Date
- Thread

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.