[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH, v2] linux-2.6.18/x86: improve CR0 read/write handling


  • To: "xen-devel" <xen-devel@xxxxxxxxxxxxx>
  • From: "Jan Beulich" <JBeulich@xxxxxxxx>
  • Date: Mon, 02 Jul 2012 11:11:58 +0100
  • Delivery-date: Mon, 02 Jul 2012 10:12:29 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

With the only bit in CR0 permitted to be changed by PV guests being TS,
optimize the handling towards that: Keep a cached value in a per-CPU
variable, and issue HYPERVISOR_fpu_taskswitch hypercalls for updates in
all but the unusual case should something in the system still try to
modify another bit (the attempt of which would then be logged by the
hypervisor).

This removes the need to have the hypervisor emulate MOV to/from CR0
instructions in all halfway frequently executed code paths.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

---
v2: Add safety measure for nested updates (those will now always access
    non-cached state).

--- a/arch/i386/kernel/cpu/common-xen.c
+++ b/arch/i386/kernel/cpu/common-xen.c
@@ -32,6 +32,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
 #ifndef CONFIG_XEN
 DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
 EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+#else
+DEFINE_PER_CPU(unsigned int, xen_x86_cr0);
+DEFINE_PER_CPU(unsigned int, xen_x86_cr0_upd) = ~0;
+EXPORT_PER_CPU_SYMBOL(xen_x86_cr0);
+EXPORT_PER_CPU_SYMBOL(xen_x86_cr0_upd);
 #endif
 
 static int cachesize_override __cpuinitdata = -1;
@@ -681,6 +686,8 @@ old_gdt:
        cpu_gdt_descr->size = GDT_SIZE - 1;
        cpu_gdt_descr->address = (unsigned long)gdt;
 #else
+       __get_cpu_var(xen_x86_cr0) = raw_read_cr0();
+       xen_clear_cr0_upd();
        if (cpu == 0 && cpu_gdt_descr->address == 0) {
                gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
                /* alloc_bootmem_pages panics on failure, so no check */
--- a/arch/i386/kernel/process-xen.c
+++ b/arch/i386/kernel/process-xen.c
@@ -639,8 +639,14 @@ struct task_struct fastcall * __switch_t
        BUG_ON(pdo > _pdo + ARRAY_SIZE(_pdo));
 #endif
        BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl));
+       if (_mcl->op == __HYPERVISOR_fpu_taskswitch)
+               __get_cpu_var(xen_x86_cr0_upd) = X86_CR0_TS;
        if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
                BUG();
+       if (_mcl->op == __HYPERVISOR_fpu_taskswitch) {
+               __get_cpu_var(xen_x86_cr0) |= X86_CR0_TS;
+               xen_clear_cr0_upd();
+       }
 
        /*
         * Restore %fs and %gs if needed.
--- a/arch/i386/kernel/traps-xen.c
+++ b/arch/i386/kernel/traps-xen.c
@@ -1057,6 +1057,7 @@ asmlinkage void math_state_restore(struc
        struct task_struct *tsk = thread->task;
 
        /* NB. 'clts' is done for us by Xen during virtual trap. */
+       __get_cpu_var(xen_x86_cr0) &= ~X86_CR0_TS;
        if (!tsk_used_math(tsk))
                init_fpu(tsk);
        restore_fpu(tsk);
--- a/arch/x86_64/kernel/process-xen.c
+++ b/arch/x86_64/kernel/process-xen.c
@@ -572,8 +572,14 @@ __switch_to(struct task_struct *prev_p, 
        BUG_ON(pdo > _pdo + ARRAY_SIZE(_pdo));
 #endif
        BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl));
+       if (_mcl->op == __HYPERVISOR_fpu_taskswitch)
+               __get_cpu_var(xen_x86_cr0_upd) = X86_CR0_TS;
        if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
                BUG();
+       if (_mcl->op == __HYPERVISOR_fpu_taskswitch) {
+               __get_cpu_var(xen_x86_cr0) |= X86_CR0_TS;
+               xen_clear_cr0_upd();
+       }
 
        /* 
         * Switch DS and ES.
--- a/arch/x86_64/kernel/setup64-xen.c
+++ b/arch/x86_64/kernel/setup64-xen.c
@@ -126,6 +126,11 @@ void __init setup_per_cpu_areas(void)
 } 
 
 #ifdef CONFIG_XEN
+DEFINE_PER_CPU(unsigned long, xen_x86_cr0);
+DEFINE_PER_CPU(unsigned long, xen_x86_cr0_upd) = ~0;
+EXPORT_PER_CPU_SYMBOL(xen_x86_cr0);
+EXPORT_PER_CPU_SYMBOL(xen_x86_cr0_upd);
+
 static void switch_pt(void)
 {
        xen_pt_switch(__pa_symbol(init_level4_pgt));
@@ -174,6 +179,8 @@ void pda_init(int cpu)
        if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
                                        (unsigned long)pda))
                BUG();
+       __get_cpu_var(xen_x86_cr0) = raw_read_cr0();
+       xen_clear_cr0_upd();
 #endif
        pda->cpunumber = cpu; 
        pda->irqcount = -1;
--- a/arch/x86_64/kernel/traps-xen.c
+++ b/arch/x86_64/kernel/traps-xen.c
@@ -1075,8 +1075,9 @@ asmlinkage void __attribute__((weak)) mc
 asmlinkage void math_state_restore(void)
 {
        struct task_struct *me = current;
-        /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
 
+       /* NB. 'clts' is done for us by Xen during virtual trap. */
+       __get_cpu_var(xen_x86_cr0) &= ~X86_CR0_TS;
        if (!used_math())
                init_fpu(me);
        restore_fpu_checking(&me->thread.i387.fxsave);
--- a/include/asm-i386/mach-xen/asm/system.h
+++ b/include/asm-i386/mach-xen/asm/system.h
@@ -2,8 +2,10 @@
 #define __ASM_SYSTEM_H
 
 #include <linux/kernel.h>
+#include <linux/threads.h>
 #include <asm/segment.h>
 #include <asm/cpufeature.h>
+#include <asm/percpu.h>
 #include <linux/bitops.h> /* for LOCK_PREFIX */
 #include <asm/synch_bitops.h>
 #include <asm/hypervisor.h>
@@ -90,15 +91,50 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
 #define savesegment(seg, value) \
        asm volatile("mov %%" #seg ",%0":"=rm" (value))
 
-#define read_cr0() ({ \
+DECLARE_PER_CPU(unsigned int, xen_x86_cr0);
+DECLARE_PER_CPU(unsigned int, xen_x86_cr0_upd);
+
+#define xen_read_cr0_upd() ({ \
+       unsigned int u__ = __get_cpu_var(xen_x86_cr0_upd); \
+       rmb(); \
+       u__; \
+})
+#define xen_clear_cr0_upd() do { \
+       wmb(); \
+       __get_cpu_var(xen_x86_cr0_upd) = 0; \
+} while (0)
+
+#define raw_read_cr0() ({ \
        unsigned int __dummy; \
        __asm__ __volatile__( \
                "movl %%cr0,%0\n\t" \
                :"=r" (__dummy)); \
        __dummy; \
 })
-#define write_cr0(x) \
+#define read_cr0() (likely(!xen_read_cr0_upd()) ? \
+                   __get_cpu_var(xen_x86_cr0) : raw_read_cr0())
+#define raw_write_cr0(x) \
        __asm__ __volatile__("movl %0,%%cr0": :"r" (x))
+#define write_cr0(x) do { \
+       unsigned int x__ = (x); \
+       unsigned int upd = x__ ^ __get_cpu_var(xen_x86_cr0); \
+       if (unlikely(cmpxchg(&__get_cpu_var(xen_x86_cr0_upd), 0, upd))) { \
+               raw_write_cr0(x__); \
+               break; \
+       } \
+       switch (upd) { \
+       case 0: \
+               continue; \
+       case X86_CR0_TS: \
+               HYPERVISOR_fpu_taskswitch(!!(x__ & X86_CR0_TS)); \
+               break; \
+       default: \
+               raw_write_cr0(x__); \
+               break; \
+       } \
+       __get_cpu_var(xen_x86_cr0) = x__; \
+       xen_clear_cr0_upd(); \
+} while (0)
 
 #define read_cr2() (current_vcpu_info()->arch.cr2)
 #define write_cr2(x) \
@@ -142,8 +178,27 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
 /*
  * Clear and set 'TS' bit respectively
  */
-#define clts() (HYPERVISOR_fpu_taskswitch(0))
-#define stts() (HYPERVISOR_fpu_taskswitch(1))
+#define X86_CR0_TS 8
+#define clts() ({ \
+       if (unlikely(xen_read_cr0_upd())) \
+               HYPERVISOR_fpu_taskswitch(0); \
+       else if (__get_cpu_var(xen_x86_cr0) & X86_CR0_TS) { \
+               __get_cpu_var(xen_x86_cr0_upd) = X86_CR0_TS; \
+               HYPERVISOR_fpu_taskswitch(0); \
+               __get_cpu_var(xen_x86_cr0) &= ~X86_CR0_TS; \
+               xen_clear_cr0_upd(); \
+       } \
+})
+#define stts() ({ \
+       if (unlikely(xen_read_cr0_upd())) \
+               HYPERVISOR_fpu_taskswitch(1); \
+       else if (!(__get_cpu_var(xen_x86_cr0) & X86_CR0_TS)) { \
+               __get_cpu_var(xen_x86_cr0_upd) = X86_CR0_TS; \
+               HYPERVISOR_fpu_taskswitch(1); \
+               __get_cpu_var(xen_x86_cr0) |= X86_CR0_TS; \
+               xen_clear_cr0_upd(); \
+       } \
+})
 
 #endif /* __KERNEL__ */
 
--- a/include/asm-x86_64/mach-xen/asm/system.h
+++ b/include/asm-x86_64/mach-xen/asm/system.h
@@ -7,7 +7,7 @@
 
 #include <asm/synch_bitops.h>
 #include <asm/hypervisor.h>
-#include <xen/interface/arch-x86_64.h>
+#include <asm/percpu.h>
 
 #ifdef __KERNEL__
 
@@ -71,19 +71,64 @@ struct alt_instr {
 /*
  * Clear and set 'TS' bit respectively
  */
-#define clts() (HYPERVISOR_fpu_taskswitch(0))
+#define X86_CR0_TS 8
+#define clts() ({ \
+       if (unlikely(xen_read_cr0_upd())) \
+               HYPERVISOR_fpu_taskswitch(0); \
+       else if (__get_cpu_var(xen_x86_cr0) & X86_CR0_TS) { \
+               __get_cpu_var(xen_x86_cr0_upd) = X86_CR0_TS; \
+               HYPERVISOR_fpu_taskswitch(0); \
+               __get_cpu_var(xen_x86_cr0) &= ~X86_CR0_TS; \
+               xen_clear_cr0_upd(); \
+       } \
+})
 
-static inline unsigned long read_cr0(void)
+DECLARE_PER_CPU(unsigned long, xen_x86_cr0);
+DECLARE_PER_CPU(unsigned long, xen_x86_cr0_upd);
+
+#define xen_read_cr0_upd() ({ \
+       unsigned long u__ = __get_cpu_var(xen_x86_cr0_upd); \
+       rmb(); \
+       u__; \
+})
+#define xen_clear_cr0_upd() do { \
+       wmb(); \
+       __get_cpu_var(xen_x86_cr0_upd) = 0; \
+} while (0)
+
+static inline unsigned long raw_read_cr0(void)
 { 
        unsigned long cr0;
        asm volatile("movq %%cr0,%0" : "=r" (cr0));
        return cr0;
 } 
+#define read_cr0() (likely(!xen_read_cr0_upd()) ? \
+                   __get_cpu_var(xen_x86_cr0) : raw_read_cr0())
 
-static inline void write_cr0(unsigned long val) 
+static inline void raw_write_cr0(unsigned long val)
 { 
        asm volatile("movq %0,%%cr0" :: "r" (val));
 } 
+#define write_cr0(x) do { \
+       unsigned long x__ = (x); \
+       unsigned long upd = x__ ^ __get_cpu_var(xen_x86_cr0); \
+       if (unlikely(cmpxchg(&__get_cpu_var(xen_x86_cr0_upd), 0, upd))) { \
+               raw_write_cr0(x__); \
+               break; \
+       } \
+       switch (upd) { \
+       case 0: \
+               continue; \
+       case X86_CR0_TS: \
+               HYPERVISOR_fpu_taskswitch(!!(x__ & X86_CR0_TS)); \
+               break; \
+       default: \
+               raw_write_cr0(x__); \
+               break; \
+       } \
+       __get_cpu_var(xen_x86_cr0) = x__; \
+       xen_clear_cr0_upd(); \
+} while (0)
 
 #define read_cr3() ({ \
        unsigned long __dummy; \
@@ -103,7 +148,16 @@ static inline void write_cr4(unsigned lo
        asm volatile("movq %0,%%cr4" :: "r" (val));
 } 
 
-#define stts() (HYPERVISOR_fpu_taskswitch(1))
+#define stts() ({ \
+       if (unlikely(xen_read_cr0_upd())) \
+               HYPERVISOR_fpu_taskswitch(1); \
+       else if (!(__get_cpu_var(xen_x86_cr0) & X86_CR0_TS)) { \
+               __get_cpu_var(xen_x86_cr0_upd) = X86_CR0_TS; \
+               HYPERVISOR_fpu_taskswitch(1); \
+               __get_cpu_var(xen_x86_cr0) |= X86_CR0_TS; \
+               xen_clear_cr0_upd(); \
+       } \
+})
 
 #define wbinvd() \
        __asm__ __volatile__ ("wbinvd": : :"memory");


Attachment: xen-x86-cr0.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.