x86: fix FS/GS base handling when using the fsgsbase feature In that case, due to the respective instructions not being privileged, we can't rely on our in-memory data to always be correct: While the guest is running, it may change without us knowing about it. Therefore we need to - read the correct values from hardware during context switch out (save_segments()) - read the correct values from hardware during RDMSR emulation - update in-memory values during guest mode change (toggle_guest_mode()) For completeness/consistency, WRMSR emulation is also being switched to use wr[fg]sbase(). Signed-off-by: Jan Beulich --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1251,6 +1251,15 @@ static void save_segments(struct vcpu *v regs->fs = read_segment_register(fs); regs->gs = read_segment_register(gs); + if ( cpu_has_fsgsbase && !is_pv_32bit_vcpu(v) ) + { + v->arch.pv_vcpu.fs_base = __rdfsbase(); + if ( v->arch.flags & TF_kernel_mode ) + v->arch.pv_vcpu.gs_base_kernel = __rdgsbase(); + else + v->arch.pv_vcpu.gs_base_user = __rdgsbase(); + } + if ( regs->ds ) dirty_segment_mask |= DIRTY_DS; --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2382,15 +2382,13 @@ static int emulate_privileged_op(struct case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - if ( wrmsr_safe(MSR_FS_BASE, msr_content) ) - goto fail; + wrfsbase(msr_content); v->arch.pv_vcpu.fs_base = msr_content; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - if ( wrmsr_safe(MSR_GS_BASE, msr_content) ) - goto fail; + wrgsbase(msr_content); v->arch.pv_vcpu.gs_base_kernel = msr_content; break; case MSR_SHADOW_GS_BASE: @@ -2535,15 +2533,14 @@ static int emulate_privileged_op(struct case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - regs->eax = v->arch.pv_vcpu.fs_base & 0xFFFFFFFFUL; - regs->edx = v->arch.pv_vcpu.fs_base >> 32; - break; + val = cpu_has_fsgsbase ? __rdfsbase() : v->arch.pv_vcpu.fs_base; + goto rdmsr_writeback; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - regs->eax = v->arch.pv_vcpu.gs_base_kernel & 0xFFFFFFFFUL; - regs->edx = v->arch.pv_vcpu.gs_base_kernel >> 32; - break; + val = cpu_has_fsgsbase ? __rdgsbase() + : v->arch.pv_vcpu.gs_base_kernel; + goto rdmsr_writeback; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -257,6 +257,13 @@ void toggle_guest_mode(struct vcpu *v) { if ( is_pv_32bit_vcpu(v) ) return; + if ( cpu_has_fsgsbase ) + { + if ( v->arch.flags & TF_kernel_mode ) + v->arch.pv_vcpu.gs_base_kernel = __rdgsbase(); + else + v->arch.pv_vcpu.gs_base_user = __rdgsbase(); + } v->arch.flags ^= TF_kernel_mode; asm volatile ( "swapgs" ); update_cr3(v); --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -98,34 +98,52 @@ static inline int wrmsr_safe(unsigned in : "=a" (low), "=d" (high) \ : "c" (counter)) -static inline unsigned long rdfsbase(void) +static inline unsigned long __rdfsbase(void) { unsigned long base; - if ( cpu_has_fsgsbase ) #ifdef HAVE_GAS_FSGSBASE - asm volatile ( "rdfsbase %0" : "=r" (base) ); + asm volatile ( "rdfsbase %0" : "=r" (base) ); #else - asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) ); + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) ); #endif - else - rdmsrl(MSR_FS_BASE, base); return base; } -static inline unsigned long rdgsbase(void) +static inline unsigned long __rdgsbase(void) { unsigned long base; - if ( cpu_has_fsgsbase ) #ifdef HAVE_GAS_FSGSBASE - asm volatile ( "rdgsbase %0" : "=r" (base) ); + asm volatile ( "rdgsbase %0" : "=r" (base) ); #else - asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) ); + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) ); #endif - else - rdmsrl(MSR_GS_BASE, base); + + return base; +} + +static inline unsigned long rdfsbase(void) +{ + unsigned long base; + + if ( cpu_has_fsgsbase ) + return __rdfsbase(); + + rdmsrl(MSR_FS_BASE, base); + + return base; +} + +static inline unsigned long rdgsbase(void) +{ + unsigned long base; + + if ( cpu_has_fsgsbase ) + return __rdgsbase(); + + rdmsrl(MSR_GS_BASE, base); return base; }