Xen project Mailing List

[Xen-devel] [PATCH 6/9] x86/np2m: Send flush IPIs only when a vcpu is actively using a shadow p2m

From: George Dunlap <george.dunlap@xxxxxxxxxx>

Date: Fri, 29 Sep 2017 16:01:41 +0100

Cc: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx>, Kevin Tian <kevin.tian@xxxxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>, Jun Nakajima <jun.nakajima@xxxxxxxxx>

Delivery-date: Fri, 29 Sep 2017 15:02:13 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Flush IPIs are sent to all cpus in a shadow p2m's dirty_cpumask when updated. This mask however is far to broad. A pcpu's bit is set in the cpumask when a vcpu runs on that pcpu, but is only cleared when a flush happens. This means that the IPI includes the current pcpu of vcpus that are not currently running, and also includes any pcpu that has ever had a vcpu use this p2m since the last flush (which in turn will cause spurious invalidations if a different vcpu is using a shadow p2m). Avoid these IPIs by keeping closer track of where a p2m is being used, and when a vcpu needs to be flushed: - On schedule-out, clear v->processor in p2m->dirty_cpumask - Add a 'generation' counter to the p2m and nestedvcpu structs to detect changes that would require re-loads on re-entry - On schedule-in or p2m change: - Set v->processor in p2m->dirty_cpumask - flush the vcpu's nested p2m pointer (and update nv->generation) if the generation changed Signed-off-by: Sergey Dyasli <sergey.dyasli@xxxxxxxxxx> Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxx> --- Changes since v1: - Combine patches 5 and 8, and the scheduling bits of patch 11 ("x86/np2m: add np2m_generation", "x86/np2m: add np2m_schedule()", and "x86/np2m: implement sharing of np2m between vCPUs") - Reword commit message CC: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> CC: Jan Beulich <jbeulich@xxxxxxxx> CC: Jun Nakajima <jun.nakajima@xxxxxxxxx> CC: Kevin Tian <kevin.tian@xxxxxxxxx> --- xen/arch/x86/domain.c | 2 ++ xen/arch/x86/hvm/nestedhvm.c | 1 + xen/arch/x86/hvm/vmx/vvmx.c | 3 +++ xen/arch/x86/mm/p2m.c | 55 +++++++++++++++++++++++++++++++++++++++++- xen/include/asm-x86/hvm/vcpu.h | 1 + xen/include/asm-x86/p2m.h | 6 +++++ 6 files changed, 67 insertions(+), 1 deletion(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 466a1a2fac..35ea0d2418 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1668,6 +1668,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) { _update_runstate_area(prev); vpmu_switch_from(prev); + np2m_schedule(NP2M_SCHEDLE_OUT); } if ( is_hvm_domain(prevd) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) @@ -1716,6 +1717,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) /* Must be done with interrupts enabled */ vpmu_switch_to(next); + np2m_schedule(NP2M_SCHEDLE_IN); } /* Ensure that the vcpu has an up-to-date time base. */ diff --git a/xen/arch/x86/hvm/nestedhvm.c b/xen/arch/x86/hvm/nestedhvm.c index 74a464d162..ab50b2ab98 100644 --- a/xen/arch/x86/hvm/nestedhvm.c +++ b/xen/arch/x86/hvm/nestedhvm.c @@ -57,6 +57,7 @@ nestedhvm_vcpu_reset(struct vcpu *v) nv->nv_flushp2m = 0; nv->nv_p2m = NULL; nv->stale_np2m = false; + nv->np2m_generation = 0; hvm_asid_flush_vcpu_asid(&nv->nv_n2asid); diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 48e37158af..a6a558b460 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1367,6 +1367,9 @@ static void virtual_vmexit(struct cpu_user_regs *regs) !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) ) shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields); + /* This will clear current pCPU bit in p2m->dirty_cpumask */ + np2m_schedule(NP2M_SCHEDLE_OUT); + vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa); nestedhvm_vcpu_exit_guestmode(v); diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index fd48a3b9db..3c6c486c00 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -73,6 +73,7 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) p2m->p2m_class = p2m_host; p2m->np2m_base = P2M_BASE_EADDR; + p2m->np2m_generation = 0; for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i ) p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN); @@ -1735,6 +1736,7 @@ p2m_flush_table_locked(struct p2m_domain *p2m) /* This is no longer a valid nested p2m for any address space */ p2m->np2m_base = P2M_BASE_EADDR; + p2m->np2m_generation++; /* Make sure nobody else is using this p2m table */ nestedhvm_vmcx_flushtlb(p2m); @@ -1809,6 +1811,7 @@ static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m) nv->nv_flushp2m = 0; nv->nv_p2m = p2m; + nv->np2m_generation = p2m->np2m_generation; cpumask_set_cpu(v->processor, p2m->dirty_cpumask); } @@ -1840,7 +1843,9 @@ p2m_get_nestedp2m_locked(struct vcpu *v) p2m_lock(p2m); if ( p2m->np2m_base == np2m_base || p2m->np2m_base == P2M_BASE_EADDR ) { - if ( p2m->np2m_base == P2M_BASE_EADDR ) + /* Check if np2m was flushed just before the lock */ + if ( p2m->np2m_base == P2M_BASE_EADDR || + nv->np2m_generation != p2m->np2m_generation ) nvcpu_flush(v); p2m->np2m_base = np2m_base; assign_np2m(v, p2m); @@ -1848,6 +1853,11 @@ p2m_get_nestedp2m_locked(struct vcpu *v) return p2m; } + else + { + /* vCPU is switching from some other valid np2m */ + cpumask_clear_cpu(v->processor, p2m->dirty_cpumask); + } p2m_unlock(p2m); } @@ -1881,6 +1891,49 @@ p2m_get_p2m(struct vcpu *v) return p2m_get_nestedp2m(v); } +void np2m_schedule(int dir) +{ + struct nestedvcpu *nv = &vcpu_nestedhvm(current); + struct p2m_domain *p2m; + + ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT); + + if ( !nestedhvm_enabled(current->domain) || + !nestedhvm_vcpu_in_guestmode(current) || + !nestedhvm_paging_mode_hap(current) ) + return; + + p2m = nv->nv_p2m; + if ( p2m ) + { + bool np2m_valid; + + p2m_lock(p2m); + np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(current) && + nv->np2m_generation == p2m->np2m_generation; + if ( dir == NP2M_SCHEDLE_OUT && np2m_valid ) + { + /* + * The np2m is up to date but this vCPU will no longer use it, + * which means there are no reasons to send a flush IPI. + */ + cpumask_clear_cpu(current->processor, p2m->dirty_cpumask); + } + else if ( dir == NP2M_SCHEDLE_IN ) + { + if ( !np2m_valid ) + { + /* This vCPU's np2m was flushed while it was not runnable */ + hvm_asid_flush_core(); + vcpu_nestedhvm(current).nv_p2m = NULL; + } + else + cpumask_set_cpu(current->processor, p2m->dirty_cpumask); + } + p2m_unlock(p2m); + } +} + unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec) diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 5cfa4b4aa4..afe5ffc6b3 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -116,6 +116,7 @@ struct nestedvcpu { bool_t nv_flushp2m; /* True, when p2m table must be flushed */ struct p2m_domain *nv_p2m; /* used p2m table for this vcpu */ bool stale_np2m; /* True when p2m_base in VMCX02 is no longer valid */ + uint64_t np2m_generation; struct hvm_vcpu_asid nv_n2asid; diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 4a1c10c130..8d4aa8c6bf 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -209,6 +209,7 @@ struct p2m_domain { * to set it to any other value. */ #define P2M_BASE_EADDR (~0ULL) uint64_t np2m_base; + uint64_t np2m_generation; /* Nested p2ms: linked list of n2pms allocated to this domain. * The host p2m hasolds the head of the list and the np2ms are @@ -371,6 +372,11 @@ struct p2m_domain *p2m_get_nestedp2m_locked(struct vcpu *v); */ struct p2m_domain *p2m_get_p2m(struct vcpu *v); +#define NP2M_SCHEDLE_IN 0 +#define NP2M_SCHEDLE_OUT 1 + +void np2m_schedule(int dir); + static inline bool_t p2m_is_hostp2m(const struct p2m_domain *p2m) { return p2m->p2m_class == p2m_host; -- 2.14.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.