Xen project Mailing List

Re: [Xen-devel] [PATCH v5 4/7] xen/x86: use invpcid for flushing the TLB

To: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx

From: Juergen Gross <jgross@xxxxxxxx>

Date: Fri, 6 Apr 2018 15:48:08 +0200

Delivery-date: Fri, 06 Apr 2018 13:48:23 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 06/04/18 15:40, Andrew Cooper wrote: > On 06/04/18 08:52, Juergen Gross wrote: >> If possible use the INVPCID instruction for flushing the TLB instead of >> toggling cr4.pge for that purpose. >> >> While at it remove the dependency on cr4.pge being required for mtrr >> loading, as this will be required later anyway. >> >> Add a command line option "invpcid" for controlling the use of >> INVPCID (default to true). >> >> Signed-off-by: Juergen Gross <jgross@xxxxxxxx> >> --- >> V5: >> - use pre_flush() as an initializer in do_tlb_flush() (Jan Beulich) >> - introduce boolean use_invpcid instead of clearing X86_FEATURE_INVPCID >> (Jan Beulich) >> >> V4: >> - option "invpcid" instead of "noinvpcid" (Jan Beulich) >> >> V3: >> - new patch >> --- >> docs/misc/xen-command-line.markdown | 10 ++++++++++ >> xen/arch/x86/cpu/mtrr/generic.c | 37 >> ++++++++++++++++++++++++++----------- >> xen/arch/x86/flushtlb.c | 29 +++++++++++++++++++---------- >> xen/arch/x86/setup.c | 8 ++++++++ >> xen/include/asm-x86/invpcid.h | 2 ++ >> 5 files changed, 65 insertions(+), 21 deletions(-) >> >> diff --git a/docs/misc/xen-command-line.markdown >> b/docs/misc/xen-command-line.markdown >> index 79be9a6ba5..5f6ae654ad 100644 >> --- a/docs/misc/xen-command-line.markdown >> +++ b/docs/misc/xen-command-line.markdown >> @@ -1380,6 +1380,16 @@ Because responsibility for APIC setup is shared >> between Xen and the >> domain 0 kernel this option is automatically propagated to the domain >> 0 command line. >> >> +### invpcid (x86) >> +> `= <boolean>` >> + >> +> Default: `true` >> + >> +Control using the INVPCID instruction for flushing TLB entries. >> +This should only be used in case of known issues on the current platform >> +with that instruction. Disabling INVPCID will normally result in a slightly >> +degraded performance. > > How about: > > By default, Xen will use the INVPCID instruction for TLB management if > it is available. This option can be used to cause Xen to fall back to > older mechanisms, which are generally slower. > > ? Works for me. > We are not aware of any systems with INVPCID problems, but "for testing > purposes" is also a valid reason to use this option. Okay, I'll change it. > > >> + >> ### noirqbalance >> > `= <boolean>` >> >> diff --git a/xen/arch/x86/cpu/mtrr/generic.c >> b/xen/arch/x86/cpu/mtrr/generic.c >> index e9c0e5e059..705855e753 100644 >> --- a/xen/arch/x86/cpu/mtrr/generic.c >> +++ b/xen/arch/x86/cpu/mtrr/generic.c >> @@ -5,6 +5,7 @@ >> #include <xen/mm.h> >> #include <xen/stdbool.h> >> #include <asm/flushtlb.h> >> +#include <asm/invpcid.h> >> #include <asm/io.h> >> #include <asm/mtrr.h> >> #include <asm/msr.h> >> @@ -400,8 +401,10 @@ static DEFINE_SPINLOCK(set_atomicity_lock); >> * has been called. >> */ >> >> -static void prepare_set(void) >> +static bool prepare_set(void) >> { >> + unsigned long cr4; >> + >> /* Note that this is not ideal, since the cache is only >> flushed/disabled >> for this CPU while the MTRRs are changed, but changing this requires >> more invasive changes to the way the kernel boots */ >> @@ -412,18 +415,24 @@ static void prepare_set(void) >> write_cr0(read_cr0() | X86_CR0_CD); >> wbinvd(); >> >> - /* TLB flushing here relies on Xen always using CR4.PGE. */ >> - BUILD_BUG_ON(!(XEN_MINIMAL_CR4 & X86_CR4_PGE)); >> - write_cr4(read_cr4() & ~X86_CR4_PGE); >> + cr4 = read_cr4(); >> + if (cr4 & X86_CR4_PGE) >> + write_cr4(cr4 & ~X86_CR4_PGE); >> + else if (use_invpcid) >> + invpcid_flush_all(); >> + else >> + asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); > > We could really do with gaining a static inline wrapper for write_cr3(), > because at the very least, the memory clobber going missing would be a > BadThing(tm). Sadly that name is already taken in Xen, and behaves > differently to (all?) other OS's example. > > There are currently only 3 callers of write_cr3(). How about a prereq > patch renaming write_cr3() to switch_cr3() (as switch_cr3() logically > implies the TLB clock activities), and a new thin write_cr3() wrapper? Sure. > >> >> /* Save MTRR state */ >> rdmsrl(MSR_MTRRdefType, deftype); >> >> /* Disable MTRRs, and set the default type to uncached */ >> mtrr_wrmsr(MSR_MTRRdefType, deftype & ~0xcff); >> + >> + return cr4 & X86_CR4_PGE; >> } >> >> -static void post_set(void) >> +static void post_set(bool pge) >> { >> /* Intel (P6) standard MTRRs */ >> mtrr_wrmsr(MSR_MTRRdefType, deftype); >> @@ -432,7 +441,12 @@ static void post_set(void) >> write_cr0(read_cr0() & ~X86_CR0_CD); >> >> /* Reenable CR4.PGE (also flushes the TLB) */ >> - write_cr4(read_cr4() | X86_CR4_PGE); >> + if (pge) >> + write_cr4(read_cr4() | X86_CR4_PGE); >> + else if (use_invpcid) >> + invpcid_flush_all(); >> + else >> + asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); >> >> spin_unlock(&set_atomicity_lock); >> } >> @@ -441,14 +455,15 @@ static void generic_set_all(void) >> { >> unsigned long mask, count; >> unsigned long flags; >> + bool pge; >> >> local_irq_save(flags); >> - prepare_set(); >> + pge = prepare_set(); >> >> /* Actually set the state */ >> mask = set_mtrr_state(); >> >> - post_set(); >> + post_set(pge); >> local_irq_restore(flags); >> >> /* Use the atomic bitops to update the global mask */ >> @@ -457,7 +472,6 @@ static void generic_set_all(void) >> set_bit(count, &smp_changes_mask); >> mask >>= 1; >> } >> - >> } >> >> static void generic_set_mtrr(unsigned int reg, unsigned long base, >> @@ -474,11 +488,12 @@ static void generic_set_mtrr(unsigned int reg, >> unsigned long base, >> { >> unsigned long flags; >> struct mtrr_var_range *vr; >> + bool pge; >> >> vr = &mtrr_state.var_ranges[reg]; >> >> local_irq_save(flags); >> - prepare_set(); >> + pge = prepare_set(); >> >> if (size == 0) { >> /* The invalid bit is kept in the mask, so we simply clear the >> @@ -499,7 +514,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned >> long base, >> mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), vr->mask); >> } >> >> - post_set(); >> + post_set(pge); >> local_irq_restore(flags); >> } >> >> diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c >> index 38cedf3b22..f793b70696 100644 >> --- a/xen/arch/x86/flushtlb.c >> +++ b/xen/arch/x86/flushtlb.c >> @@ -10,6 +10,7 @@ >> #include <xen/sched.h> >> #include <xen/softirq.h> >> #include <asm/flushtlb.h> >> +#include <asm/invpcid.h> >> #include <asm/page.h> >> >> /* Debug builds: Wrap frequently to stress-test the wrap logic. */ >> @@ -71,6 +72,23 @@ static void post_flush(u32 t) >> this_cpu(tlbflush_time) = t; >> } >> >> +static void do_tlb_flush(void) >> +{ >> + u32 t = pre_flush(); >> + >> + if ( use_invpcid ) >> + invpcid_flush_all(); >> + else >> + { >> + unsigned long cr4 = read_cr4(); >> + >> + write_cr4(cr4 ^ X86_CR4_PGE); >> + write_cr4(cr4); >> + } >> + >> + post_flush(t); >> +} >> + >> void write_cr3(unsigned long cr3) >> { >> unsigned long flags, cr4; >> @@ -118,16 +136,7 @@ unsigned int flush_area_local(const void *va, unsigned >> int flags) >> : : "m" (*(const char *)(va)) : "memory" ); >> } >> else >> - { >> - u32 t = pre_flush(); >> - unsigned long cr4 = read_cr4(); >> - >> - write_cr4(cr4 & ~X86_CR4_PGE); >> - barrier(); >> - write_cr4(cr4); >> - >> - post_flush(t); >> - } >> + do_tlb_flush(); >> } >> >> if ( flags & FLUSH_CACHE ) >> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c >> index e83d3b4f07..0a27402e4d 100644 >> --- a/xen/arch/x86/setup.c >> +++ b/xen/arch/x86/setup.c >> @@ -63,6 +63,11 @@ boolean_param("nosmp", opt_nosmp); >> static unsigned int __initdata max_cpus; >> integer_param("maxcpus", max_cpus); >> >> +/* opt_invpcid: If false, don't use INVPCID instruction even if available. >> */ >> +static bool __initdata opt_invpcid = true; >> +boolean_param("invpcid", opt_invpcid); >> +bool use_invpcid; > > __read_mostly. Yes. Juergen _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.