[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Minios-devel] [UNIKRAFT PATCH v2 4/9] plat: check for and enable extended CPU features
Florian Schmidt <florian.schmidt@xxxxxxxxx> writes: > But only do this if code is compiled with SSE/AVX. > > Signed-off-by: Florian Schmidt <florian.schmidt@xxxxxxxxx> > --- > plat/common/include/x86/cpu_defs.h | 22 ++++++++++ > plat/kvm/x86/entry64.S | 58 +++++++++++++++++++++---- > plat/kvm/x86/setup.c | 15 ------- > plat/xen/x86/entry64.S | 68 +++++++++++++++++++++++++++--- > plat/xen/x86/setup.c | 15 ------- > 5 files changed, 135 insertions(+), 43 deletions(-) > > diff --git a/plat/common/include/x86/cpu_defs.h > b/plat/common/include/x86/cpu_defs.h > index 9ecec967..78821b52 100644 > --- a/plat/common/include/x86/cpu_defs.h > +++ b/plat/common/include/x86/cpu_defs.h > @@ -58,6 +58,7 @@ > */ > #define X86_CR0_MP (1 << 1) /* Monitor Coprocessor */ > #define X86_CR0_EM (1 << 2) /* Emulation */ > +#define X86_CR0_TS (1 << 2) /* Task Switched */ Did you mean 1 << 3? > #define X86_CR0_NE (1 << 5) /* Numeric Exception */ > #define X86_CR0_PG (1 << 31) /* Paging */ > > @@ -67,10 +68,31 @@ > #define X86_CR4_PAE (1 << 5) /* enable PAE */ > #define X86_CR4_OSFXSR (1 << 9) /* OS support for FXSAVE/FXRSTOR > */ > #define X86_CR4_OSXMMEXCPT (1 << 10) /* OS support for FP exceptions > */ > +#define X86_CR4_FSGSBASE (1 << 16) /* enable FSGSBASE*/ > +#define X86_CR4_OSXSAVE (1 << 18) /* enable XSAVE, extended states > */ > > /* > * Intel CPU features in EFER > */ > #define X86_EFER_LME (1 << 8) /* Long mode enable (R/W) */ > > +/* CPUID feature bits in ECX and EDX when EAX=1 */ > +#define X86_CPUID1_ECX_XSAVE (1 << 26) > +#define X86_CPUID1_ECX_OSXSAVE (1 << 27) > +#define X86_CPUID1_ECX_AVX (1 << 28) > +#define X86_CPUID1_EDX_FPU (1 << 0) > +#define X86_CPUID1_EDX_FXSR (1 << 24) > +#define X86_CPUID1_EDX_SSE (1 << 25) > +/* CPUID feature bits in EBX and ECX when EAX=7 */ ECX is in charge of the sub-leaf, maybe add this in comment as you did for the next one? > +#define X86_CPUID7_EBX_FSGSBASE (1 << 0) > +/* CPUID feature bits when EAX=0xd, EXC=1 */ Typo EXC instead of ECX > +#define X86_CPUIDD1_EAX_XSAVEOPT (1<<0) > + > +/* > + * Extended Control Register 0 (XCR0) > + */ > +#define X86_XCR0_X87 (1 << 0) > +#define X86_XCR0_XMM (1 << 1) > +#define X86_XCR0_YMM (1 << 2) As far as I understand, the last two are enabling SSE and AVX. Maybe we name them respectively, instead of XMM and YMM? > + > #endif /* __PLAT_CMN_X86_CPU_DEFS_H__ */ > diff --git a/plat/kvm/x86/entry64.S b/plat/kvm/x86/entry64.S > index dc3614a2..3fde22ea 100644 > --- a/plat/kvm/x86/entry64.S > +++ b/plat/kvm/x86/entry64.S > @@ -172,15 +172,57 @@ ENTRY(_libkvmplat_start64) > movq $bootstack, %rsp > xorq %rbp, %rbp > > - /* enable FPU and SSE units */ > - movq %cr0, %rax > - andq $(~X86_CR0_EM), %rax > - orq $(X86_CR0_MP | X86_CR0_NE), %rax > - movq %rax, %cr0 > - movq %cr4, %rax > - orq $(X86_CR4_OSXMMEXCPT | X86_CR4_OSFXSR), %rax > - movq %rax, %cr4 > + /* We will work on cr0 and cr4 multiple times. > + * We put cr0 into rsi and cr4 into rdi, because cpuid and > + * xgetbv/xsetbv work on eax/ebx/ecx/edx. */ > + movq %cr0, %rsi > + movq %cr4, %rdi > + /* FPU and SSE are part of base x86-64, so no need to check for their > + * availability before enabling and initializing. */ > + andl $(~(X86_CR0_EM | X86_CR0_TS)), %esi > + orl $(X86_CR0_MP | X86_CR0_NE), %esi > + movq %rsi, %cr0 > + fninit So the floating point is enabled always? Should it be rather be a config option? Not insisting, just asking. > +#if __SSE__ > + orl $(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT), %edi > + movq %rdi, %cr4 > ldmxcsr (mxcsr_ptr) > +#endif /* __SSE__ */ > + /* Check capabilities subject to availability as indicated by cpuid. > + * First, start off with "standard features" */ > + movl $0x1, %eax > + cpuid > +#if __AVX__ > + /* ecx and edx now contain capability information, so we can now > + * enable capabilities based on the indicated features */ > + /* OSXSAVE needs to be enabled before AVX */ > + testl $(X86_CPUID1_ECX_XSAVE), %ecx > + jz noxsave > + orl $(X86_CR4_OSXSAVE), %edi > + movq %rdi, %cr4 > + /* now enable AVX. This needs to be last checking cpuid features from > + * the eax=1 cpuid call, because it clobbers ecx */ > + testl $(X86_CPUID1_ECX_AVX), %ecx > + jz noavx > + xorl %ecx, %ecx > + xgetbv > + orl $(X86_XCR0_XMM | X86_XCR0_YMM), %eax According to my very shallow understanding, XMM is SSE relating thing. Should its enabling be moved to the appropriate section above? Anyways, it probably a good idea to add a compilation error in case __AVX__ is defined, but __SSE__ is not. > + xsetbv > +noavx: > +noxsave: > +#endif /* __AVX__ */ > + /* Now, check for extended features. */ > + movl $0x7, %eax > + movl $0x1, %ecx > + cpuid > + /* ebx, ecx, edx now contain extended capabilties information. */ > + /* check for and enable FS/GSBASE */ > + testl $(X86_CPUID7_EBX_FSGSBASE), %ebx > + jz nofsgsbase > + orl $(X86_CR4_FSGSBASE), %edi > + movq %rdi, %cr4 > +nofsgsbase: > + /* done setting up CPU capabilities */ > > /* read multiboot info pointer */ > movq -8(%rsp), %rdi > diff --git a/plat/kvm/x86/setup.c b/plat/kvm/x86/setup.c > index e02886d1..47a78dcf 100644 > --- a/plat/kvm/x86/setup.c > +++ b/plat/kvm/x86/setup.c > @@ -109,20 +109,6 @@ static inline void _mb_init_mem(struct multiboot_info > *mi) > _libkvmplat_stack_top = (void *) (max_addr - __STACK_SIZE); > } > > -static inline void _init_cpufeatures(void) > -{ > -#if __SSE__ > - unsigned long sse_status = 0x1f80; > -#endif > - > - /* FPU */ > - asm volatile("fninit"); > - > -#if __SSE__ > - asm volatile("ldmxcsr %0" : : "m"(sse_status)); > -#endif > -} > - > static void _libkvmplat_entry2(void *arg __attribute__((unused))) > { > ukplat_entry_argp(NULL, cmdline, sizeof(cmdline)); > @@ -133,7 +119,6 @@ void _libkvmplat_entry(void *arg) > struct multiboot_info *mi = (struct multiboot_info *)arg; > > _libkvmplat_init_console(); > - _init_cpufeatures(); > traps_init(); > intctrl_init(); > > diff --git a/plat/xen/x86/entry64.S b/plat/xen/x86/entry64.S > index c266804a..4363ac0e 100644 > --- a/plat/xen/x86/entry64.S > +++ b/plat/xen/x86/entry64.S > @@ -25,6 +25,7 @@ > > #include <uk/arch/types.h> > #include <uk/arch/limits.h> > +#include <x86/cpu_defs.h> > #include <x86/traps.h> > #include <uk/config.h> > #include <xen/xen.h> > @@ -60,11 +61,68 @@ _libxenplat_start: > #include "entry_hvm.S" > > #endif > - cld > - movq stack_start(%rip),%rsp > - andq $(~(__STACK_SIZE-1)), %rsp > - movq %rsi,%rdi > - call _libxenplat_x86entry > + cld > + movq stack_start(%rip),%rsp > + andq $(~(__STACK_SIZE-1)), %rsp > + movq %rsi, %r8 /* esi contains pointer to start_info page */ > + /* We will work on cr0 and cr4 multiple times. > + * We put cr0 into rsi and cr4 into rdi, because cpuid and > + * xgetbv/xsetbv work on eax/ebx/ecx/edx. */ > + movq %cr0, %rsi > + movq %cr4, %rdi > + /* FPU and SSE are part of base x86-64, so no need to check for their > + * availability before enabling and initializing. */ > + andl $(~(X86_CR0_EM | X86_CR0_TS)), %esi > + orl $(X86_CR0_MP | X86_CR0_NE), %esi > + movq %rsi, %cr0 > + fninit > +#if __SSE__ > + orl $(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT), %edi > + movq %rdi, %cr4 > + ldmxcsr (mxcsr_ptr) > +#endif /* __SSE__ */ > + /* Check capabilities subject to availability as indicated by cpuid. > + * First, start off with "standard features" */ > + movl $0x1, %eax > + cpuid > +#if __AVX__ > + /* ecx and edx now contain capability information, so we can now > + * enable capabilities based on the indicated features */ > + /* OSXSAVE needs to be enabled before AVX */ > + testl $(X86_CPUID1_ECX_XSAVE), %ecx > + jz noxsave > + orl $(X86_CR4_OSXSAVE), %edi > + movq %rdi, %cr4 > + /* now enable AVX. This needs to be last checking cpuid features from > + * the eax=1 cpuid call, because it clobbers ecx */ > + testl $(X86_CPUID1_ECX_AVX), %ecx > + jz noavx > + xorl %ecx, %ecx > + xgetbv > + orl $(X86_XCR0_XMM | X86_XCR0_YMM), %eax > + xsetbv > +noavx: > +noxsave: > +#endif /* __AVX__ */ > + /* Now, check for extended features. */ > + movl $0x7, %eax > + movl $0x1, %ecx > + cpuid > + /* ebx, ecx, edx now contain extended capabilties information. */ > + /* check for and enable FS/GSBASE */ > + testl $(X86_CPUID7_EBX_FSGSBASE), %ebx > + jz nofsgsbase > + orl $(X86_CR4_FSGSBASE), %edi > + movq %rdi, %cr4 > +nofsgsbase: > + /* Done setting up CPU capabilities, hand over to C entry point. */ > + movq %r8, %rdi /* pass pointer to start_info page to C entry */ > + call _libxenplat_x86entry > + > +.type mxcsr_ptr, @object > +mxcsr_ptr: > + .long 0x1f80 /* Intel SDM power-on default */ > + > > stack_start: > .quad _libxenplat_bootstack + (2*__STACK_SIZE) > diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c > index 35fdd35e..a41d5cb3 100644 > --- a/plat/xen/x86/setup.c > +++ b/plat/xen/x86/setup.c > @@ -113,20 +113,6 @@ static inline void _init_traps(void) > traps_init(); > } > > -static inline void _init_cpufeatures(void) > -{ > -#if __SSE__ > - unsigned long sse_status = 0x1f80; > -#endif > - > - /* FPU */ > - asm volatile("fninit"); > - > -#if __SSE__ > - asm volatile("ldmxcsr %0" : : "m"(sse_status)); > -#endif > -} > - > static inline void _init_shared_info(void) > { > int ret; > @@ -184,7 +170,6 @@ void _libxenplat_x86entry(void *start_info) __noreturn; > void _libxenplat_x86entry(void *start_info) > { > _init_traps(); > - _init_cpufeatures(); > HYPERVISOR_start_info = (start_info_t *)start_info; > _libxenplat_prepare_console(); /* enables buffering for console */ > > -- > 2.19.2 > -- Yuri Volchkov Software Specialist NEC Europe Ltd Kurfürsten-Anlage 36 D-69115 Heidelberg _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |