[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86: consolidate/enhance TLB flushing interface
Pretty yukky and unlikely to improve performance on any current workload. The scary errata around INVLPG on 2MB/4MB pages makes me think we should just continue to avoid INVLPG for anything other than 4kB regions. Certainly unless there is a demonstrable measurable performance loss for taking this conservative approach. Anyway, you're basically turning off the cunning goodness for all current Intel CPUs. :-) -- Keir On 21/8/07 16:25, "Jan Beulich" <jbeulich@xxxxxxxxxx> wrote: > Folding into a single local handler and a single SMP multiplexor as > well as adding capability to also flush caches through the same > interfaces (a subsequent patch will make use of this). > > Once at changing cpuinfo_x86, this patch also removes several unused > fields apparently inherited from Linux. > > The changes to xen/include/asm-x86/cpufeature.h apply cleanly only with > the SVM/EFER patch (which was sent several days ago) applied. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> > > Index: 2007-08-08/xen/arch/x86/cpu/common.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/cpu/common.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/cpu/common.c 2007-08-08 12:03:19.000000000 +0200 > @@ -229,7 +229,6 @@ static void __init early_cpu_detect(void > void __devinit generic_identify(struct cpuinfo_x86 * c) > { > u32 tfms, xlvl; > - int junk; > > if (have_cpuid_p()) { > /* Get vendor name */ > @@ -244,8 +243,8 @@ void __devinit generic_identify(struct c > > /* Intel-defined flags: level 0x00000001 */ > if ( c->cpuid_level >= 0x00000001 ) { > - u32 capability, excap; > - cpuid(0x00000001, &tfms, &junk, &excap, &capability); > + u32 capability, excap, ebx; > + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); > c->x86_capability[0] = capability; > c->x86_capability[4] = excap; > c->x86 = (tfms >> 8) & 15; > @@ -255,6 +254,8 @@ void __devinit generic_identify(struct c > c->x86_model += ((tfms >> 16) & 0xF) << 4; > } > c->x86_mask = tfms & 15; > + if ( cpu_has(c, X86_FEATURE_CLFLSH) ) > + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; > } else { > /* Have CPUID level 0 only - unheard of */ > c->x86 = 4; > @@ -272,6 +273,11 @@ void __devinit generic_identify(struct c > } > } > > +#ifdef CONFIG_X86_64 > + if ( cpu_has(c, X86_FEATURE_PAGE1GB) ) > + __set_bit(3, &c->invlpg_works_ok); > +#endif > + > early_intel_workaround(c); > > #ifdef CONFIG_X86_HT > @@ -313,6 +319,7 @@ void __devinit identify_cpu(struct cpuin > c->x86_vendor_id[0] = '\0'; /* Unset */ > c->x86_model_id[0] = '\0'; /* Unset */ > c->x86_max_cores = 1; > + c->x86_clflush_size = 0; > memset(&c->x86_capability, 0, sizeof c->x86_capability); > > if (!have_cpuid_p()) { > Index: 2007-08-08/xen/arch/x86/cpu/intel.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/cpu/intel.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/cpu/intel.c 2007-08-21 12:17:32.000000000 +0200 > @@ -16,8 +16,6 @@ > > #define select_idle_routine(x) ((void)0) > > -extern int trap_init_f00f_bug(void); > - > #ifdef CONFIG_X86_INTEL_USERCOPY > /* > * Alignment at which movsl is preferred for bulk memory copies. > @@ -97,25 +95,6 @@ static void __devinit init_intel(struct > unsigned int l2 = 0; > char *p = NULL; > > -#ifdef CONFIG_X86_F00F_BUG > - /* > - * All current models of Pentium and Pentium with MMX technology CPUs > - * have the F0 0F bug, which lets nonprivileged users lock up the system. > - * Note that the workaround only should be initialized once... > - */ > - c->f00f_bug = 0; > - if ( c->x86 == 5 ) { > - static int f00f_workaround_enabled = 0; > - > - c->f00f_bug = 1; > - if ( !f00f_workaround_enabled ) { > - trap_init_f00f_bug(); > - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround > enabled.\n"); > - f00f_workaround_enabled = 1; > - } > - } > -#endif > - > select_idle_routine(c); > l2 = init_intel_cacheinfo(c); > > @@ -123,6 +102,16 @@ static void __devinit init_intel(struct > if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) > clear_bit(X86_FEATURE_SEP, c->x86_capability); > > + if ( /* PentiumPro erratum 30 */ > + (c->x86 == 6 && c->x86_model == 1 && c->x86_mask < 9) || > + /* Dual-Core Intel Xeon 3000/5100 series erratum 89/90 */ > + /* Quad-Core Intel Xeon 3200/5300 series erratum 89/88 */ > + /* Intel Core2 erratum 89 */ > + (c->x86 == 6 && c->x86_model == 15 ) || > + /* Dual-Core Intel Xeon LV/ULV erratum 75 */ > + (c->x86 == 6 && c->x86_model == 14 ) ) > + __clear_bit(2, &c->invlpg_works_ok); > + > /* Names for the Pentium II/Celeron processors > detectable only by also checking the cache size. > Dixon is NOT a Celeron. */ > Index: 2007-08-08/xen/arch/x86/flushtlb.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/flushtlb.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/flushtlb.c 2007-08-21 14:00:19.000000000 +0200 > @@ -95,26 +95,74 @@ void write_cr3(unsigned long cr3) > local_irq_restore(flags); > } > > -void local_flush_tlb(void) > +void flush_one_local(const void *va, unsigned int flags) > { > - unsigned long flags; > - u32 t; > + const struct cpuinfo_x86 *c = ¤t_cpu_data; > + unsigned int level = flags & FLUSH_LEVEL_MASK; > + unsigned long irqfl; > > - /* This non-reentrant function is sometimes called in interrupt context. > */ > - local_irq_save(flags); > - > - t = pre_flush(); > + if ( !c->x86 ) > + c = &boot_cpu_data; > > - hvm_flush_guest_tlbs(); > + /* This non-reentrant function is sometimes called in interrupt context. > */ > + local_irq_save(irqfl); > > -#ifdef USER_MAPPINGS_ARE_GLOBAL > - __pge_off(); > - __pge_on(); > -#else > - __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); > + if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) ) > + { > + if ( level > 0 && > + level < CONFIG_PAGING_LEVELS && > + test_bit(level, &c->invlpg_works_ok) ) > + __asm__ __volatile__( "invlpg %0" > + : > + : "m" (*(const char *)(va)) > + : "memory" ); > + else > + { > + u32 t = pre_flush(); > + > + hvm_flush_guest_tlbs(); > + > +#ifndef USER_MAPPINGS_ARE_GLOBAL > + if ( !(flags & FLUSH_TLB_GLOBAL) || > + !(mmu_cr4_features & X86_CR4_PGE) ) > + __asm__ __volatile__ ( "mov %0, %%cr3" > + : > + : "r" (read_cr3()) > + : "memory" ); > + else > #endif > + { > + __pge_off(); > + __pge_on(); > + } > + > + post_flush(t); > + } > + } > + > + if ( flags & FLUSH_CACHE ) > + { > + unsigned long sz; > + > + if ( level > 0 && level < CONFIG_PAGING_LEVELS ) > + sz = 1UL << ((level - 1) * PAGETABLE_ORDER); > + else > + sz = ULONG_MAX; > + if ( c->x86_clflush_size > 0 && > + c->x86_cache_size > 0 && > + sz < (c->x86_cache_size >> (PAGE_SHIFT - 10)) ) > + { > + unsigned long i; > + > + va = (const void *)((unsigned long)va & ~(sz - 1)); > + for ( i = 0; i < sz; i += c->x86_clflush_size ) > + __asm__ __volatile__( "clflush %0" > + : > + : "m" (((const char *)va)[i]) ); > + } > + else > + wbinvd(); > + } > > - post_flush(t); > - > - local_irq_restore(flags); > + local_irq_restore(irqfl); > } > Index: 2007-08-08/xen/arch/x86/mm.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/mm.c 2007-08-21 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/arch/x86/mm.c 2007-08-21 14:18:00.000000000 +0200 > @@ -3497,7 +3497,7 @@ int map_pages_to_xen( > > if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) > { > - local_flush_tlb_pge(); > + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2); > if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) ) > free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e))); > } > @@ -3525,7 +3525,7 @@ int map_pages_to_xen( > l2e_get_flags(*pl2e) & > ~_PAGE_PSE)); > l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e), > __PAGE_HYPERVISOR)); > - local_flush_tlb_pge(); > + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2); > } > > pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt); > @@ -3608,7 +3608,7 @@ void destroy_xen_mappings(unsigned long > } > } > > - flush_tlb_all_pge(); > + flush_global(FLUSH_TLB_GLOBAL); > } > > void __set_fixmap( > Index: 2007-08-08/xen/arch/x86/setup.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/setup.c 2007-08-21 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/arch/x86/setup.c 2007-08-21 11:06:23.000000000 +0200 > @@ -114,7 +114,7 @@ struct tss_struct init_tss[NR_CPUS]; > > char __attribute__ ((__section__(".bss.stack_aligned"))) > cpu0_stack[STACK_SIZE]; > > -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; > +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, 0x6, 1, -1 }; > > #if CONFIG_PAGING_LEVELS > 2 > unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE; > Index: 2007-08-08/xen/arch/x86/smp.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/smp.c 2007-08-21 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/arch/x86/smp.c 2007-08-21 12:20:28.000000000 +0200 > @@ -164,7 +164,8 @@ void send_IPI_mask_phys(cpumask_t mask, > > static DEFINE_SPINLOCK(flush_lock); > static cpumask_t flush_cpumask; > -static unsigned long flush_va; > +static const void *flush_va; > +static unsigned int flush_flags; > > fastcall void smp_invalidate_interrupt(void) > { > @@ -172,26 +173,18 @@ fastcall void smp_invalidate_interrupt(v > perfc_incr(ipis); > irq_enter(); > if ( !__sync_lazy_execstate() ) > - { > - if ( flush_va == FLUSHVA_ALL ) > - local_flush_tlb(); > - else > - local_flush_tlb_one(flush_va); > - } > + flush_one_local(flush_va, flush_flags); > cpu_clear(smp_processor_id(), flush_cpumask); > irq_exit(); > } > > -void __flush_tlb_mask(cpumask_t mask, unsigned long va) > +void flush_one_mask(cpumask_t mask, const void *va, unsigned int flags) > { > ASSERT(local_irq_is_enabled()); > > if ( cpu_isset(smp_processor_id(), mask) ) > { > - if ( va == FLUSHVA_ALL ) > - local_flush_tlb(); > - else > - local_flush_tlb_one(va); > + flush_one_local(va, flags); > cpu_clear(smp_processor_id(), mask); > } > > @@ -200,6 +193,7 @@ void __flush_tlb_mask(cpumask_t mask, un > spin_lock(&flush_lock); > flush_cpumask = mask; > flush_va = va; > + flush_flags = flags; > send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); > while ( !cpus_empty(flush_cpumask) ) > cpu_relax(); > @@ -215,24 +209,13 @@ void new_tlbflush_clock_period(void) > /* Flush everyone else. We definitely flushed just before entry. */ > allbutself = cpu_online_map; > cpu_clear(smp_processor_id(), allbutself); > - __flush_tlb_mask(allbutself, FLUSHVA_ALL); > + flush_mask(allbutself, FLUSH_TLB); > > /* No need for atomicity: we are the only possible updater. */ > ASSERT(tlbflush_clock == 0); > tlbflush_clock++; > } > > -static void flush_tlb_all_pge_ipi(void *info) > -{ > - local_flush_tlb_pge(); > -} > - > -void flush_tlb_all_pge(void) > -{ > - smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1); > - local_flush_tlb_pge(); > -} > - > void smp_send_event_check_mask(cpumask_t mask) > { > cpu_clear(smp_processor_id(), mask); > Index: 2007-08-08/xen/arch/x86/x86_32/mm.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/x86_32/mm.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/x86_32/mm.c 2007-08-21 09:59:15.000000000 +0200 > @@ -149,7 +149,7 @@ void __init zap_low_mappings(l2_pgentry_ > /* Now zap mappings in the idle pagetables. */ > destroy_xen_mappings(0, HYPERVISOR_VIRT_START); > > - flush_tlb_all_pge(); > + flush_global(FLUSH_TLB_GLOBAL); > > /* Replace with mapping of the boot trampoline only. */ > map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, > Index: 2007-08-08/xen/arch/x86/x86_64/mm.c > =================================================================== > --- 2007-08-08.orig/xen/arch/x86/x86_64/mm.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/x86_64/mm.c 2007-08-21 12:33:35.000000000 +0200 > @@ -205,7 +205,7 @@ void __init zap_low_mappings(void) > > /* Remove aliased mapping of first 1:1 PML4 entry. */ > l4e_write(&idle_pg_table[0], l4e_empty()); > - local_flush_tlb_pge(); > + flush_local(FLUSH_TLB_GLOBAL); > > /* Replace with mapping of the boot trampoline only. */ > map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, > Index: 2007-08-08/xen/include/asm-x86/cpufeature.h > =================================================================== > --- 2007-08-08.orig/xen/include/asm-x86/cpufeature.h 2007-08-21 > 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/include/asm-x86/cpufeature.h 2007-08-21 12:07:46.000000000 > +0200 > @@ -50,6 +50,7 @@ > #define X86_FEATURE_NX (1*32+20) /* Execute Disable */ > #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ > #define X86_FEATURE_FFXSR (1*32+25) /* FFXSR instruction optimizations */ > +#define X86_FEATURE_PAGE1GB (1*32+26) /* 1Gb large page support */ > #define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ > #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ > #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ > @@ -137,6 +138,7 @@ > #define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) > #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) > #define cpu_has_ffxsr 0 > +#define cpu_has_page1gb 0 > #else /* __x86_64__ */ > #define cpu_has_vme 0 > #define cpu_has_de 1 > @@ -161,6 +163,7 @@ > #define cpu_has_centaur_mcr 0 > #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) > #define cpu_has_ffxsr boot_cpu_has(X86_FEATURE_FFXSR) > +#define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB) > #endif > > #endif /* __ASM_I386_CPUFEATURE_H */ > Index: 2007-08-08/xen/include/asm-x86/flushtlb.h > =================================================================== > --- 2007-08-08.orig/xen/include/asm-x86/flushtlb.h 2007-08-21 > 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/include/asm-x86/flushtlb.h 2007-08-21 12:34:36.000000000 > +0200 > @@ -15,6 +15,29 @@ > #include <xen/smp.h> > #include <xen/types.h> > > +#define FLUSH_LEVEL_MASK 0x0f > +#define FLUSH_TLB 0x10 > +#define FLUSH_TLB_GLOBAL 0x20 > +#define FLUSH_CACHE 0x40 > + > +void flush_one_local(const void *va, unsigned int flags); > +static inline void flush_local(unsigned int flags) > +{ > + flush_one_local(NULL, flags | FLUSH_LEVEL_MASK); > +} > +#ifdef CONFIG_SMP > +void flush_one_mask(cpumask_t, const void *va, unsigned int flags); > +static inline void flush_mask(cpumask_t mask, unsigned int flags) > +{ > + flush_one_mask(mask, NULL, flags | FLUSH_LEVEL_MASK); > +} > +#else > +#define flush_mask(mask, flags) flush_local(flags) > +#define flush_one_mask(mask, va, flags) flush_one_local(va, flags) > +#endif > +#define flush_global(flags) flush_mask(cpu_online_map, flags) > +#define flush_one_global(va, flags) flush_one_mask(cpu_online_map, va, flags) > + > /* The current time as shown by the virtual TLB clock. */ > extern u32 tlbflush_clock; > > @@ -72,31 +95,20 @@ static inline unsigned long read_cr3(voi > extern void write_cr3(unsigned long cr3); > > /* Flush guest mappings from the TLB and implicitly tick the tlbflush clock. > */ > -extern void local_flush_tlb(void); > - > -#define local_flush_tlb_pge() \ > - do { \ > - __pge_off(); \ > - local_flush_tlb(); \ > - __pge_on(); \ > - } while ( 0 ) > +#define local_flush_tlb(void) flush_local(FLUSH_TLB) > > -#define local_flush_tlb_one(__addr) \ > - __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) > +#define local_flush_tlb_one(v) \ > + flush_one_local((const void *)(v), FLUSH_TLB|1) > > #define flush_tlb_all() flush_tlb_mask(cpu_online_map) > > #ifndef CONFIG_SMP > -#define flush_tlb_all_pge() local_flush_tlb_pge() > #define flush_tlb_mask(mask) local_flush_tlb() > -#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(_v) > +#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(v) > #else > #include <xen/smp.h> > -#define FLUSHVA_ALL (~0UL) > -extern void flush_tlb_all_pge(void); > -extern void __flush_tlb_mask(cpumask_t mask, unsigned long va); > -#define flush_tlb_mask(mask) __flush_tlb_mask(mask,FLUSHVA_ALL) > -#define flush_tlb_one_mask(mask,v) __flush_tlb_mask(mask,(unsigned long)(v)) > +#define flush_tlb_mask(mask) flush_mask(mask, FLUSH_TLB) > +#define flush_tlb_one_mask(mask,v) flush_one_mask(mask,(const void *)(v), > FLUSH_TLB|1) > #endif > > #endif /* __FLUSHTLB_H__ */ > Index: 2007-08-08/xen/include/asm-x86/processor.h > =================================================================== > --- 2007-08-08.orig/xen/include/asm-x86/processor.h 2007-08-21 > 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/include/asm-x86/processor.h 2007-08-21 11:34:25.000000000 > +0200 > @@ -156,24 +156,20 @@ struct cpuinfo_x86 { > __u8 x86_vendor; /* CPU vendor */ > __u8 x86_model; > __u8 x86_mask; > - char wp_works_ok; /* It doesn't on 386's */ > - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ > + __u8 invlpg_works_ok; > char hard_math; > - char rfu; > int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ > unsigned int x86_capability[NCAPINTS]; > char x86_vendor_id[16]; > char x86_model_id[64]; > int x86_cache_size; /* in KB - valid for CPUS which support this call > */ > int x86_cache_alignment; /* In bytes */ > - char fdiv_bug; > - char f00f_bug; > char coma_bug; > - char pad0; > int x86_power; > unsigned char x86_max_cores; /* cpuid returned max cores value */ > unsigned char booted_cores; /* number of cores as seen by OS */ > unsigned char apicid; > + unsigned short x86_clflush_size; > } __cacheline_aligned; > > /* > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |