[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCH v2 4/9] plat: check for and enable extended CPU features



Florian Schmidt <florian.schmidt@xxxxxxxxx> writes:

> But only do this if code is compiled with SSE/AVX.
>
> Signed-off-by: Florian Schmidt <florian.schmidt@xxxxxxxxx>
> ---
>  plat/common/include/x86/cpu_defs.h | 22 ++++++++++
>  plat/kvm/x86/entry64.S             | 58 +++++++++++++++++++++----
>  plat/kvm/x86/setup.c               | 15 -------
>  plat/xen/x86/entry64.S             | 68 +++++++++++++++++++++++++++---
>  plat/xen/x86/setup.c               | 15 -------
>  5 files changed, 135 insertions(+), 43 deletions(-)
>
> diff --git a/plat/common/include/x86/cpu_defs.h 
> b/plat/common/include/x86/cpu_defs.h
> index 9ecec967..78821b52 100644
> --- a/plat/common/include/x86/cpu_defs.h
> +++ b/plat/common/include/x86/cpu_defs.h
> @@ -58,6 +58,7 @@
>   */
>  #define X86_CR0_MP              (1 << 1)    /* Monitor Coprocessor */
>  #define X86_CR0_EM              (1 << 2)    /* Emulation */
> +#define X86_CR0_TS              (1 << 2)    /* Task Switched */
Did you mean 1 << 3?

>  #define X86_CR0_NE              (1 << 5)    /* Numeric Exception */
>  #define X86_CR0_PG              (1 << 31)   /* Paging */
>  
> @@ -67,10 +68,31 @@
>  #define X86_CR4_PAE             (1 << 5)    /* enable PAE */
>  #define X86_CR4_OSFXSR          (1 << 9)    /* OS support for FXSAVE/FXRSTOR 
> */
>  #define X86_CR4_OSXMMEXCPT      (1 << 10)   /* OS support for FP exceptions 
> */
> +#define X86_CR4_FSGSBASE        (1 << 16)   /* enable FSGSBASE*/
> +#define X86_CR4_OSXSAVE         (1 << 18)   /* enable XSAVE, extended states 
> */
>  
>  /*
>   * Intel CPU features in EFER
>   */
>  #define X86_EFER_LME            (1 << 8)    /* Long mode enable (R/W) */
>  
> +/* CPUID feature bits in ECX and EDX when EAX=1 */
> +#define X86_CPUID1_ECX_XSAVE    (1 << 26)
> +#define X86_CPUID1_ECX_OSXSAVE  (1 << 27)
> +#define X86_CPUID1_ECX_AVX      (1 << 28)
> +#define X86_CPUID1_EDX_FPU      (1 << 0)
> +#define X86_CPUID1_EDX_FXSR     (1 << 24)
> +#define X86_CPUID1_EDX_SSE      (1 << 25)
> +/* CPUID feature bits in EBX and ECX when EAX=7 */
ECX is in charge of the sub-leaf, maybe add this in comment as you did
for the next one?

> +#define X86_CPUID7_EBX_FSGSBASE (1 << 0)
> +/* CPUID feature bits when EAX=0xd, EXC=1 */
Typo EXC instead of ECX

> +#define X86_CPUIDD1_EAX_XSAVEOPT (1<<0)
> +
> +/*
> + * Extended Control Register 0 (XCR0)
> + */
> +#define X86_XCR0_X87            (1 << 0)
> +#define X86_XCR0_XMM            (1 << 1)
> +#define X86_XCR0_YMM            (1 << 2)
As far as I understand, the last two are enabling SSE and AVX. Maybe we
name them respectively, instead of XMM and YMM?

> +
>  #endif /* __PLAT_CMN_X86_CPU_DEFS_H__ */
> diff --git a/plat/kvm/x86/entry64.S b/plat/kvm/x86/entry64.S
> index dc3614a2..3fde22ea 100644
> --- a/plat/kvm/x86/entry64.S
> +++ b/plat/kvm/x86/entry64.S
> @@ -172,15 +172,57 @@ ENTRY(_libkvmplat_start64)
>       movq $bootstack, %rsp
>       xorq %rbp, %rbp
>  
> -     /* enable FPU and SSE units */
> -     movq %cr0, %rax
> -     andq $(~X86_CR0_EM), %rax
> -     orq $(X86_CR0_MP | X86_CR0_NE), %rax
> -     movq %rax, %cr0
> -     movq %cr4, %rax
> -     orq $(X86_CR4_OSXMMEXCPT | X86_CR4_OSFXSR), %rax
> -     movq %rax, %cr4
> +     /* We will work on cr0 and cr4 multiple times.
> +      * We put cr0 into rsi and cr4 into rdi, because cpuid and
> +      * xgetbv/xsetbv work on eax/ebx/ecx/edx. */
> +     movq %cr0, %rsi
> +     movq %cr4, %rdi
> +     /* FPU and SSE are part of base x86-64, so no need to check for their
> +      * availability before enabling and initializing. */
> +     andl $(~(X86_CR0_EM | X86_CR0_TS)), %esi
> +     orl $(X86_CR0_MP | X86_CR0_NE), %esi
> +     movq %rsi, %cr0
> +     fninit
So the floating point is enabled always? Should it be rather be a config
option? Not insisting, just asking.

> +#if __SSE__
> +     orl $(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT), %edi
> +     movq %rdi, %cr4
>       ldmxcsr (mxcsr_ptr)
> +#endif /* __SSE__ */
> +     /* Check capabilities subject to availability as indicated by cpuid.
> +      * First, start off with "standard features" */
> +     movl $0x1, %eax
> +     cpuid
> +#if __AVX__
> +     /* ecx and edx now contain capability information, so we can now
> +      * enable capabilities based on the indicated features */
> +     /* OSXSAVE needs to be enabled before AVX */
> +     testl $(X86_CPUID1_ECX_XSAVE), %ecx
> +     jz noxsave
> +     orl $(X86_CR4_OSXSAVE), %edi
> +     movq %rdi, %cr4
> +     /* now enable AVX. This needs to be last checking cpuid features from
> +      * the eax=1 cpuid call, because it clobbers ecx */
> +     testl $(X86_CPUID1_ECX_AVX), %ecx
> +     jz noavx
> +     xorl %ecx, %ecx
> +     xgetbv
> +     orl $(X86_XCR0_XMM | X86_XCR0_YMM), %eax
According to my very shallow understanding, XMM is SSE relating
thing. Should its enabling be moved to the appropriate section above?

Anyways, it probably a good idea to add a compilation error in case
__AVX__ is defined, but __SSE__ is not.

> +     xsetbv
> +noavx:
> +noxsave:
> +#endif /* __AVX__ */
> +     /* Now, check for extended features. */
> +     movl $0x7, %eax
> +     movl $0x1, %ecx
> +     cpuid
> +     /* ebx, ecx, edx now contain extended capabilties information. */
> +     /* check for and enable FS/GSBASE */
> +     testl $(X86_CPUID7_EBX_FSGSBASE), %ebx
> +     jz nofsgsbase
> +     orl $(X86_CR4_FSGSBASE), %edi
> +     movq %rdi, %cr4
> +nofsgsbase:
> +     /* done setting up CPU capabilities */
>  
>       /* read multiboot info pointer */
>       movq -8(%rsp), %rdi
> diff --git a/plat/kvm/x86/setup.c b/plat/kvm/x86/setup.c
> index e02886d1..47a78dcf 100644
> --- a/plat/kvm/x86/setup.c
> +++ b/plat/kvm/x86/setup.c
> @@ -109,20 +109,6 @@ static inline void _mb_init_mem(struct multiboot_info 
> *mi)
>       _libkvmplat_stack_top  = (void *) (max_addr - __STACK_SIZE);
>  }
>  
> -static inline void _init_cpufeatures(void)
> -{
> -#if __SSE__
> -     unsigned long sse_status = 0x1f80;
> -#endif
> -
> -     /* FPU */
> -     asm volatile("fninit");
> -
> -#if __SSE__
> -     asm volatile("ldmxcsr %0" : : "m"(sse_status));
> -#endif
> -}
> -
>  static void _libkvmplat_entry2(void *arg __attribute__((unused)))
>  {
>       ukplat_entry_argp(NULL, cmdline, sizeof(cmdline));
> @@ -133,7 +119,6 @@ void _libkvmplat_entry(void *arg)
>       struct multiboot_info *mi = (struct multiboot_info *)arg;
>  
>       _libkvmplat_init_console();
> -     _init_cpufeatures();
>       traps_init();
>       intctrl_init();
>  
> diff --git a/plat/xen/x86/entry64.S b/plat/xen/x86/entry64.S
> index c266804a..4363ac0e 100644
> --- a/plat/xen/x86/entry64.S
> +++ b/plat/xen/x86/entry64.S
> @@ -25,6 +25,7 @@
>  
>  #include <uk/arch/types.h>
>  #include <uk/arch/limits.h>
> +#include <x86/cpu_defs.h>
>  #include <x86/traps.h>
>  #include <uk/config.h>
>  #include <xen/xen.h>
> @@ -60,11 +61,68 @@ _libxenplat_start:
>  #include "entry_hvm.S"
>  
>  #endif
> -        cld
> -        movq stack_start(%rip),%rsp
> -        andq $(~(__STACK_SIZE-1)), %rsp
> -        movq %rsi,%rdi
> -        call _libxenplat_x86entry
> +     cld
> +     movq stack_start(%rip),%rsp
> +     andq $(~(__STACK_SIZE-1)), %rsp
> +     movq %rsi, %r8 /* esi contains pointer to start_info page */
> +     /* We will work on cr0 and cr4 multiple times.
> +      * We put cr0 into rsi and cr4 into rdi, because cpuid and
> +      * xgetbv/xsetbv work on eax/ebx/ecx/edx. */
> +     movq %cr0, %rsi
> +     movq %cr4, %rdi
> +     /* FPU and SSE are part of base x86-64, so no need to check for their
> +      * availability before enabling and initializing. */
> +     andl $(~(X86_CR0_EM | X86_CR0_TS)), %esi
> +     orl $(X86_CR0_MP | X86_CR0_NE), %esi
> +     movq %rsi, %cr0
> +     fninit
> +#if __SSE__
> +     orl $(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT), %edi
> +     movq %rdi, %cr4
> +     ldmxcsr (mxcsr_ptr)
> +#endif /* __SSE__ */
> +     /* Check capabilities subject to availability as indicated by cpuid.
> +      * First, start off with "standard features" */
> +     movl $0x1, %eax
> +     cpuid
> +#if __AVX__
> +     /* ecx and edx now contain capability information, so we can now
> +      * enable capabilities based on the indicated features */
> +     /* OSXSAVE needs to be enabled before AVX */
> +     testl $(X86_CPUID1_ECX_XSAVE), %ecx
> +     jz noxsave
> +     orl $(X86_CR4_OSXSAVE), %edi
> +     movq %rdi, %cr4
> +     /* now enable AVX. This needs to be last checking cpuid features from
> +      * the eax=1 cpuid call, because it clobbers ecx */
> +     testl $(X86_CPUID1_ECX_AVX), %ecx
> +     jz noavx
> +     xorl %ecx, %ecx
> +     xgetbv
> +     orl $(X86_XCR0_XMM | X86_XCR0_YMM), %eax
> +     xsetbv
> +noavx:
> +noxsave:
> +#endif /* __AVX__ */
> +     /* Now, check for extended features. */
> +     movl $0x7, %eax
> +     movl $0x1, %ecx
> +     cpuid
> +     /* ebx, ecx, edx now contain extended capabilties information. */
> +     /* check for and enable FS/GSBASE */
> +     testl $(X86_CPUID7_EBX_FSGSBASE), %ebx
> +     jz nofsgsbase
> +     orl $(X86_CR4_FSGSBASE), %edi
> +     movq %rdi, %cr4
> +nofsgsbase:
> +     /* Done setting up CPU capabilities, hand over to C entry point. */
> +     movq %r8, %rdi /* pass pointer to start_info page to C entry */
> +     call _libxenplat_x86entry
> +
> +.type mxcsr_ptr, @object
> +mxcsr_ptr:
> +     .long 0x1f80                    /* Intel SDM power-on default */
> +
>  
>  stack_start:
>          .quad _libxenplat_bootstack + (2*__STACK_SIZE)
> diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c
> index 35fdd35e..a41d5cb3 100644
> --- a/plat/xen/x86/setup.c
> +++ b/plat/xen/x86/setup.c
> @@ -113,20 +113,6 @@ static inline void _init_traps(void)
>       traps_init();
>  }
>  
> -static inline void _init_cpufeatures(void)
> -{
> -#if __SSE__
> -     unsigned long sse_status = 0x1f80;
> -#endif
> -
> -     /* FPU */
> -     asm volatile("fninit");
> -
> -#if __SSE__
> -     asm volatile("ldmxcsr %0" : : "m"(sse_status));
> -#endif
> -}
> -
>  static inline void _init_shared_info(void)
>  {
>       int ret;
> @@ -184,7 +170,6 @@ void _libxenplat_x86entry(void *start_info) __noreturn;
>  void _libxenplat_x86entry(void *start_info)
>  {
>       _init_traps();
> -     _init_cpufeatures();
>       HYPERVISOR_start_info = (start_info_t *)start_info;
>       _libxenplat_prepare_console(); /* enables buffering for console */
>  
> -- 
> 2.19.2
>

-- 
Yuri Volchkov
Software Specialist

NEC Europe Ltd
Kurfürsten-Anlage 36
D-69115 Heidelberg

_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.