x86: use {rd,wr}{fs,gs}base when available ... as being intended to be faster than MSR reads/writes. In the case of emulate_privileged_op() also use these in favor of the cached (but possibly stale) addresses from arch.pv_vcpu. This allows entirely removing the code that was the subject of XSA-67. Signed-off-by: Jan Beulich --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD $(call cc-option-add,CFLAGS,CC,-Wnested-externs) $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX) $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT) +$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE) ifeq ($(supervisor_mode_kernel),y) CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 --- a/xen/arch/x86/acpi/suspend.c +++ b/xen/arch/x86/acpi/suspend.c @@ -27,8 +27,8 @@ void save_rest_processor_state(void) asm volatile ( "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)" : : "r" (saved_segs) : "memory" ); - rdmsrl(MSR_FS_BASE, saved_fs_base); - rdmsrl(MSR_GS_BASE, saved_gs_base); + saved_fs_base = rdfsbase(); + saved_gs_base = rdgsbase(); rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); rdmsrl(MSR_CSTAR, saved_cstar); rdmsrl(MSR_LSTAR, saved_lstar); @@ -56,8 +56,8 @@ void restore_rest_processor_state(void) X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF, 0U); - wrmsrl(MSR_FS_BASE, saved_fs_base); - wrmsrl(MSR_GS_BASE, saved_gs_base); + wrfsbase(saved_fs_base); + wrgsbase(saved_gs_base); wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n { /* This can only be non-zero if selector is NULL. */ if ( n->arch.pv_vcpu.fs_base ) - wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base); + wrfsbase(n->arch.pv_vcpu.fs_base); /* Most kernels have non-zero GS base, so don't bother testing. */ /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ @@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n /* This can only be non-zero if selector is NULL. */ if ( n->arch.pv_vcpu.gs_base_user ) - wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user); + wrgsbase(n->arch.pv_vcpu.gs_base_user); /* If in kernel mode then switch the GS bases around. */ if ( (n->arch.flags & TF_kernel_mode) ) --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb if ( cpu_has_smep ) set_in_cr4(X86_CR4_SMEP); + if ( cpu_has_fsgsbase ) + set_in_cr4(X86_CR4_FSGSBASE); + local_irq_enable(); pt_pci_init(); --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct } else { - if ( lm_ovr == lm_seg_none || data_sel < 4 ) + switch ( lm_ovr ) { - switch ( lm_ovr ) - { - case lm_seg_none: - data_base = 0UL; - break; - case lm_seg_fs: - data_base = v->arch.pv_vcpu.fs_base; - break; - case lm_seg_gs: - if ( guest_kernel_mode(v, regs) ) - data_base = v->arch.pv_vcpu.gs_base_kernel; - else - data_base = v->arch.pv_vcpu.gs_base_user; - break; - } + default: + data_base = 0UL; + break; + case lm_seg_fs: + data_base = rdfsbase(); + break; + case lm_seg_gs: + data_base = rdgsbase(); + break; } - else if ( !read_descriptor(data_sel, v, regs, - &data_base, &data_limit, &ar, 0) || - !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) ) - goto fail; data_limit = ~0UL; ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P; } --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s (((v)->arch.pv_vcpu.ctrlreg[4] \ | (mmu_cr4_features \ & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ - X86_CR4_OSXSAVE)) \ + X86_CR4_OSXSAVE | X86_CR4_FSGSBASE)) \ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ & ~X86_CR4_DE) #define real_cr4_to_pv_guest_cr4(c) \ - ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \ - | X86_CR4_OSXSAVE | X86_CR4_SMEP)) + ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ + X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE)) void domain_cpuid(struct domain *d, unsigned int input, --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -9,6 +9,7 @@ #include #include #include +#include #define rdmsr(msr,val1,val2) \ __asm__ __volatile__("rdmsr" \ @@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in : "=a" (low), "=d" (high) \ : "c" (counter)) +static inline unsigned long rdfsbase(void) +{ + unsigned long base; + + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "rdfsbase %0" : "=r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) ); +#endif + else + rdmsrl(MSR_FS_BASE, base); + + return base; +} + +static inline unsigned long rdgsbase(void) +{ + unsigned long base; + + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "rdgsbase %0" : "=r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) ); +#endif + else + rdmsrl(MSR_GS_BASE, base); + + return base; +} + +static inline void wrfsbase(unsigned long base) +{ + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "wrfsbase %0" :: "r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) ); +#endif + else + wrmsrl(MSR_FS_BASE, base); +} + +static inline void wrgsbase(unsigned long base) +{ + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "wrgsbase %0" :: "r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) ); +#endif + else + wrmsrl(MSR_GS_BASE, base); +} DECLARE_PER_CPU(u64, efer); u64 read_efer(void);