|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v5 4/7] xen/x86: use invpcid for flushing the TLB
On 06/04/18 15:40, Andrew Cooper wrote:
> On 06/04/18 08:52, Juergen Gross wrote:
>> If possible use the INVPCID instruction for flushing the TLB instead of
>> toggling cr4.pge for that purpose.
>>
>> While at it remove the dependency on cr4.pge being required for mtrr
>> loading, as this will be required later anyway.
>>
>> Add a command line option "invpcid" for controlling the use of
>> INVPCID (default to true).
>>
>> Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
>> ---
>> V5:
>> - use pre_flush() as an initializer in do_tlb_flush() (Jan Beulich)
>> - introduce boolean use_invpcid instead of clearing X86_FEATURE_INVPCID
>> (Jan Beulich)
>>
>> V4:
>> - option "invpcid" instead of "noinvpcid" (Jan Beulich)
>>
>> V3:
>> - new patch
>> ---
>> docs/misc/xen-command-line.markdown | 10 ++++++++++
>> xen/arch/x86/cpu/mtrr/generic.c | 37
>> ++++++++++++++++++++++++++-----------
>> xen/arch/x86/flushtlb.c | 29 +++++++++++++++++++----------
>> xen/arch/x86/setup.c | 8 ++++++++
>> xen/include/asm-x86/invpcid.h | 2 ++
>> 5 files changed, 65 insertions(+), 21 deletions(-)
>>
>> diff --git a/docs/misc/xen-command-line.markdown
>> b/docs/misc/xen-command-line.markdown
>> index 79be9a6ba5..5f6ae654ad 100644
>> --- a/docs/misc/xen-command-line.markdown
>> +++ b/docs/misc/xen-command-line.markdown
>> @@ -1380,6 +1380,16 @@ Because responsibility for APIC setup is shared
>> between Xen and the
>> domain 0 kernel this option is automatically propagated to the domain
>> 0 command line.
>>
>> +### invpcid (x86)
>> +> `= <boolean>`
>> +
>> +> Default: `true`
>> +
>> +Control using the INVPCID instruction for flushing TLB entries.
>> +This should only be used in case of known issues on the current platform
>> +with that instruction. Disabling INVPCID will normally result in a slightly
>> +degraded performance.
>
> How about:
>
> By default, Xen will use the INVPCID instruction for TLB management if
> it is available. This option can be used to cause Xen to fall back to
> older mechanisms, which are generally slower.
>
> ?
Works for me.
> We are not aware of any systems with INVPCID problems, but "for testing
> purposes" is also a valid reason to use this option.
Okay, I'll change it.
>
>
>> +
>> ### noirqbalance
>> > `= <boolean>`
>>
>> diff --git a/xen/arch/x86/cpu/mtrr/generic.c
>> b/xen/arch/x86/cpu/mtrr/generic.c
>> index e9c0e5e059..705855e753 100644
>> --- a/xen/arch/x86/cpu/mtrr/generic.c
>> +++ b/xen/arch/x86/cpu/mtrr/generic.c
>> @@ -5,6 +5,7 @@
>> #include <xen/mm.h>
>> #include <xen/stdbool.h>
>> #include <asm/flushtlb.h>
>> +#include <asm/invpcid.h>
>> #include <asm/io.h>
>> #include <asm/mtrr.h>
>> #include <asm/msr.h>
>> @@ -400,8 +401,10 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
>> * has been called.
>> */
>>
>> -static void prepare_set(void)
>> +static bool prepare_set(void)
>> {
>> + unsigned long cr4;
>> +
>> /* Note that this is not ideal, since the cache is only
>> flushed/disabled
>> for this CPU while the MTRRs are changed, but changing this requires
>> more invasive changes to the way the kernel boots */
>> @@ -412,18 +415,24 @@ static void prepare_set(void)
>> write_cr0(read_cr0() | X86_CR0_CD);
>> wbinvd();
>>
>> - /* TLB flushing here relies on Xen always using CR4.PGE. */
>> - BUILD_BUG_ON(!(XEN_MINIMAL_CR4 & X86_CR4_PGE));
>> - write_cr4(read_cr4() & ~X86_CR4_PGE);
>> + cr4 = read_cr4();
>> + if (cr4 & X86_CR4_PGE)
>> + write_cr4(cr4 & ~X86_CR4_PGE);
>> + else if (use_invpcid)
>> + invpcid_flush_all();
>> + else
>> + asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
>
> We could really do with gaining a static inline wrapper for write_cr3(),
> because at the very least, the memory clobber going missing would be a
> BadThing(tm). Sadly that name is already taken in Xen, and behaves
> differently to (all?) other OS's example.
>
> There are currently only 3 callers of write_cr3(). How about a prereq
> patch renaming write_cr3() to switch_cr3() (as switch_cr3() logically
> implies the TLB clock activities), and a new thin write_cr3() wrapper?
Sure.
>
>>
>> /* Save MTRR state */
>> rdmsrl(MSR_MTRRdefType, deftype);
>>
>> /* Disable MTRRs, and set the default type to uncached */
>> mtrr_wrmsr(MSR_MTRRdefType, deftype & ~0xcff);
>> +
>> + return cr4 & X86_CR4_PGE;
>> }
>>
>> -static void post_set(void)
>> +static void post_set(bool pge)
>> {
>> /* Intel (P6) standard MTRRs */
>> mtrr_wrmsr(MSR_MTRRdefType, deftype);
>> @@ -432,7 +441,12 @@ static void post_set(void)
>> write_cr0(read_cr0() & ~X86_CR0_CD);
>>
>> /* Reenable CR4.PGE (also flushes the TLB) */
>> - write_cr4(read_cr4() | X86_CR4_PGE);
>> + if (pge)
>> + write_cr4(read_cr4() | X86_CR4_PGE);
>> + else if (use_invpcid)
>> + invpcid_flush_all();
>> + else
>> + asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
>>
>> spin_unlock(&set_atomicity_lock);
>> }
>> @@ -441,14 +455,15 @@ static void generic_set_all(void)
>> {
>> unsigned long mask, count;
>> unsigned long flags;
>> + bool pge;
>>
>> local_irq_save(flags);
>> - prepare_set();
>> + pge = prepare_set();
>>
>> /* Actually set the state */
>> mask = set_mtrr_state();
>>
>> - post_set();
>> + post_set(pge);
>> local_irq_restore(flags);
>>
>> /* Use the atomic bitops to update the global mask */
>> @@ -457,7 +472,6 @@ static void generic_set_all(void)
>> set_bit(count, &smp_changes_mask);
>> mask >>= 1;
>> }
>> -
>> }
>>
>> static void generic_set_mtrr(unsigned int reg, unsigned long base,
>> @@ -474,11 +488,12 @@ static void generic_set_mtrr(unsigned int reg,
>> unsigned long base,
>> {
>> unsigned long flags;
>> struct mtrr_var_range *vr;
>> + bool pge;
>>
>> vr = &mtrr_state.var_ranges[reg];
>>
>> local_irq_save(flags);
>> - prepare_set();
>> + pge = prepare_set();
>>
>> if (size == 0) {
>> /* The invalid bit is kept in the mask, so we simply clear the
>> @@ -499,7 +514,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned
>> long base,
>> mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), vr->mask);
>> }
>>
>> - post_set();
>> + post_set(pge);
>> local_irq_restore(flags);
>> }
>>
>> diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
>> index 38cedf3b22..f793b70696 100644
>> --- a/xen/arch/x86/flushtlb.c
>> +++ b/xen/arch/x86/flushtlb.c
>> @@ -10,6 +10,7 @@
>> #include <xen/sched.h>
>> #include <xen/softirq.h>
>> #include <asm/flushtlb.h>
>> +#include <asm/invpcid.h>
>> #include <asm/page.h>
>>
>> /* Debug builds: Wrap frequently to stress-test the wrap logic. */
>> @@ -71,6 +72,23 @@ static void post_flush(u32 t)
>> this_cpu(tlbflush_time) = t;
>> }
>>
>> +static void do_tlb_flush(void)
>> +{
>> + u32 t = pre_flush();
>> +
>> + if ( use_invpcid )
>> + invpcid_flush_all();
>> + else
>> + {
>> + unsigned long cr4 = read_cr4();
>> +
>> + write_cr4(cr4 ^ X86_CR4_PGE);
>> + write_cr4(cr4);
>> + }
>> +
>> + post_flush(t);
>> +}
>> +
>> void write_cr3(unsigned long cr3)
>> {
>> unsigned long flags, cr4;
>> @@ -118,16 +136,7 @@ unsigned int flush_area_local(const void *va, unsigned
>> int flags)
>> : : "m" (*(const char *)(va)) : "memory" );
>> }
>> else
>> - {
>> - u32 t = pre_flush();
>> - unsigned long cr4 = read_cr4();
>> -
>> - write_cr4(cr4 & ~X86_CR4_PGE);
>> - barrier();
>> - write_cr4(cr4);
>> -
>> - post_flush(t);
>> - }
>> + do_tlb_flush();
>> }
>>
>> if ( flags & FLUSH_CACHE )
>> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
>> index e83d3b4f07..0a27402e4d 100644
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -63,6 +63,11 @@ boolean_param("nosmp", opt_nosmp);
>> static unsigned int __initdata max_cpus;
>> integer_param("maxcpus", max_cpus);
>>
>> +/* opt_invpcid: If false, don't use INVPCID instruction even if available.
>> */
>> +static bool __initdata opt_invpcid = true;
>> +boolean_param("invpcid", opt_invpcid);
>> +bool use_invpcid;
>
> __read_mostly.
Yes.
Juergen
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |