[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] Ping: [PATCH 1/6] x86emul: generalize wbinvd() hook
On 01.07.2019 13:55, Jan Beulich wrote: > The hook is already in use for other purposes, and emulating e.g. > CLFLUSH by issuing WBINVD is, well, not very nice. Rename the hook and > add parameters. Use lighter weight flushing insns when possible in > hvmemul_cache_op(). > > hvmemul_cache_op() treating x86emul_invd the same as x86emul_wbinvd is > to retain original behavior, but I'm not sure this is what we want in > the long run. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Paul, any chance I could get your ack (or otherwise) here? I thought I did answer the one question you had raised to your satisfaction. Thanks, Jan > --- > v2: Use cache_op() as hook name. Convert macros to inline functions in > system.h. Re-base. > --- > I was unsure about PREFETCH* and CLDEMOTE - both are cache management > insns too, but the emulator currently treats them as a NOP without > invoking any hooks. > I was also uncertain about the new cache_flush_permitted() instance - > generally I think it wouldn't be too bad if we allowed line flushes in > all cases, in which case the checks in the ->wbinvd_intercept() handlers > would suffice (as they did until now). > > --- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c > +++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c > @@ -382,10 +382,13 @@ static int fuzz_invlpg( > return maybe_fail(ctxt, "invlpg", false); > } > > -static int fuzz_wbinvd( > +static int fuzz_cache_op( > + enum x86emul_cache_op op, > + enum x86_segment seg, > + unsigned long offset, > struct x86_emulate_ctxt *ctxt) > { > - return maybe_fail(ctxt, "wbinvd", true); > + return maybe_fail(ctxt, "cache-management", true); > } > > static int fuzz_write_io( > @@ -620,7 +623,7 @@ static const struct x86_emulate_ops all_ > SET(read_xcr), > SET(read_msr), > SET(write_msr), > - SET(wbinvd), > + SET(cache_op), > SET(invlpg), > .get_fpu = emul_test_get_fpu, > .put_fpu = emul_test_put_fpu, > @@ -729,7 +732,7 @@ enum { > HOOK_read_xcr, > HOOK_read_msr, > HOOK_write_msr, > - HOOK_wbinvd, > + HOOK_cache_op, > HOOK_cpuid, > HOOK_inject_hw_exception, > HOOK_inject_sw_interrupt, > @@ -773,7 +776,7 @@ static void disable_hooks(struct x86_emu > MAYBE_DISABLE_HOOK(read_xcr); > MAYBE_DISABLE_HOOK(read_msr); > MAYBE_DISABLE_HOOK(write_msr); > - MAYBE_DISABLE_HOOK(wbinvd); > + MAYBE_DISABLE_HOOK(cache_op); > MAYBE_DISABLE_HOOK(cpuid); > MAYBE_DISABLE_HOOK(get_fpu); > MAYBE_DISABLE_HOOK(invlpg); > --- a/xen/arch/x86/Rules.mk > +++ b/xen/arch/x86/Rules.mk > @@ -19,7 +19,9 @@ $(call as-option-add,CFLAGS,CC,"crc32 %e > $(call as-option-add,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_AS_EPT) > $(call as-option-add,CFLAGS,CC,"rdrand %eax",-DHAVE_AS_RDRAND) > $(call as-option-add,CFLAGS,CC,"rdfsbase %rax",-DHAVE_AS_FSGSBASE) > +$(call as-option-add,CFLAGS,CC,"xsaveopt (%rax)",-DHAVE_AS_XSAVEOPT) > $(call as-option-add,CFLAGS,CC,"rdseed %eax",-DHAVE_AS_RDSEED) > +$(call as-option-add,CFLAGS,CC,"clwb (%rax)",-DHAVE_AS_CLWB) > $(call as-option-add,CFLAGS,CC,".equ \"x\"$$(comma)1", \ > -U__OBJECT_LABEL__ -DHAVE_AS_QUOTED_SYM \ > '-D__OBJECT_LABEL__=$(subst > $(BASEDIR)/,,$(CURDIR))/$$@') > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -25,6 +25,7 @@ > #include <asm/hvm/trace.h> > #include <asm/hvm/support.h> > #include <asm/hvm/svm/svm.h> > +#include <asm/iocap.h> > #include <asm/vm_event.h> > > static void hvmtrace_io_assist(const ioreq_t *p) > @@ -555,16 +556,12 @@ static void *hvmemul_map_linear_addr( > mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > > /* > - * The caller has no legitimate reason for trying a zero-byte write, but > - * all other code here is written to work if the check below was dropped. > - * > - * The maximum write size depends on the number of adjacent mfns[] which > + * The maximum access size depends on the number of adjacent mfns[] which > * can be vmap()'d, accouting for possible misalignment within the > region. > * The higher level emulation callers are responsible for ensuring that > - * mfns[] is large enough for the requested write size. > + * mfns[] is large enough for the requested access size. > */ > - if ( bytes == 0 || > - nr_frames > ARRAY_SIZE(hvmemul_ctxt->mfn) ) > + if ( nr_frames > ARRAY_SIZE(hvmemul_ctxt->mfn) ) > { > ASSERT_UNREACHABLE(); > goto unhandleable; > @@ -669,8 +666,6 @@ static void hvmemul_unmap_linear_addr( > unsigned int i; > mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > > - ASSERT(bytes > 0); > - > if ( nr_frames == 1 ) > unmap_domain_page(mapping); > else > @@ -1473,7 +1468,10 @@ static int hvmemul_write_msr_discard( > return X86EMUL_OKAY; > } > > -static int hvmemul_wbinvd_discard( > +static int hvmemul_cache_op_discard( > + enum x86emul_cache_op op, > + enum x86_segment seg, > + unsigned long offset, > struct x86_emulate_ctxt *ctxt) > { > return X86EMUL_OKAY; > @@ -2149,10 +2147,65 @@ static int hvmemul_write_msr( > return rc; > } > > -static int hvmemul_wbinvd( > +static int hvmemul_cache_op( > + enum x86emul_cache_op op, > + enum x86_segment seg, > + unsigned long offset, > struct x86_emulate_ctxt *ctxt) > { > - alternative_vcall(hvm_funcs.wbinvd_intercept); > + struct hvm_emulate_ctxt *hvmemul_ctxt = > + container_of(ctxt, struct hvm_emulate_ctxt, ctxt); > + unsigned long addr, reps = 1; > + uint32_t pfec = PFEC_page_present; > + int rc; > + void *mapping; > + > + if ( !cache_flush_permitted(current->domain) ) > + return X86EMUL_OKAY; > + > + switch ( op ) > + { > + case x86emul_clflush: > + case x86emul_clflushopt: > + case x86emul_clwb: > + ASSERT(!is_x86_system_segment(seg)); > + > + rc = hvmemul_virtual_to_linear(seg, offset, 0, &reps, > + hvm_access_read, hvmemul_ctxt, &addr); > + if ( rc != X86EMUL_OKAY ) > + break; > + > + if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 ) > + pfec |= PFEC_user_mode; > + > + mapping = hvmemul_map_linear_addr(addr, 0, pfec, hvmemul_ctxt, > + current->arch.hvm.data_cache); > + if ( mapping == ERR_PTR(~X86EMUL_EXCEPTION) ) > + return X86EMUL_EXCEPTION; > + if ( IS_ERR_OR_NULL(mapping) ) > + break; > + > + if ( cpu_has_clflush ) > + { > + if ( op == x86emul_clwb && cpu_has_clwb ) > + clwb(mapping); > + else if ( op == x86emul_clflushopt && cpu_has_clflushopt ) > + clflushopt(mapping); > + else > + clflush(mapping); > + > + hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt); > + break; > + } > + > + hvmemul_unmap_linear_addr(mapping, addr, 0, hvmemul_ctxt); > + /* fall through */ > + case x86emul_invd: > + case x86emul_wbinvd: > + alternative_vcall(hvm_funcs.wbinvd_intercept); > + break; > + } > + > return X86EMUL_OKAY; > } > > @@ -2353,7 +2406,7 @@ static const struct x86_emulate_ops hvm_ > .write_xcr = hvmemul_write_xcr, > .read_msr = hvmemul_read_msr, > .write_msr = hvmemul_write_msr, > - .wbinvd = hvmemul_wbinvd, > + .cache_op = hvmemul_cache_op, > .cpuid = x86emul_cpuid, > .get_fpu = hvmemul_get_fpu, > .put_fpu = hvmemul_put_fpu, > @@ -2380,7 +2433,7 @@ static const struct x86_emulate_ops hvm_ > .write_xcr = hvmemul_write_xcr, > .read_msr = hvmemul_read_msr, > .write_msr = hvmemul_write_msr_discard, > - .wbinvd = hvmemul_wbinvd_discard, > + .cache_op = hvmemul_cache_op_discard, > .cpuid = x86emul_cpuid, > .get_fpu = hvmemul_get_fpu, > .put_fpu = hvmemul_put_fpu, > --- a/xen/arch/x86/pv/emul-priv-op.c > +++ b/xen/arch/x86/pv/emul-priv-op.c > @@ -1118,9 +1118,11 @@ static int write_msr(unsigned int reg, u > return X86EMUL_UNHANDLEABLE; > } > > -/* Name it differently to avoid clashing with wbinvd() */ > -static int _wbinvd(struct x86_emulate_ctxt *ctxt) > +static int cache_op(enum x86emul_cache_op op, enum x86_segment seg, > + unsigned long offset, struct x86_emulate_ctxt *ctxt) > { > + ASSERT(op == x86emul_wbinvd); > + > /* Ignore the instruction if unprivileged. */ > if ( !cache_flush_permitted(current->domain) ) > /* > @@ -1238,7 +1240,7 @@ static const struct x86_emulate_ops priv > .read_msr = read_msr, > .write_msr = write_msr, > .cpuid = x86emul_cpuid, > - .wbinvd = _wbinvd, > + .cache_op = cache_op, > }; > > int pv_emulate_privileged_op(struct cpu_user_regs *regs) > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -5933,8 +5933,11 @@ x86_emulate( > case X86EMUL_OPC(0x0f, 0x08): /* invd */ > case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */ > generate_exception_if(!mode_ring0(), EXC_GP, 0); > - fail_if(ops->wbinvd == NULL); > - if ( (rc = ops->wbinvd(ctxt)) != 0 ) > + fail_if(!ops->cache_op); > + if ( (rc = ops->cache_op(b == 0x09 ? x86emul_wbinvd > + : x86emul_invd, > + x86_seg_none, 0, > + ctxt)) != X86EMUL_OKAY ) > goto done; > break; > > @@ -7801,8 +7804,9 @@ x86_emulate( > /* else clwb */ > fail_if(!vex.pfx); > vcpu_must_have(clwb); > - fail_if(!ops->wbinvd); > - if ( (rc = ops->wbinvd(ctxt)) != X86EMUL_OKAY ) > + fail_if(!ops->cache_op); > + if ( (rc = ops->cache_op(x86emul_clwb, ea.mem.seg, ea.mem.off, > + ctxt)) != X86EMUL_OKAY ) > goto done; > break; > case 7: > @@ -7818,8 +7822,11 @@ x86_emulate( > vcpu_must_have(clflush); > else > vcpu_must_have(clflushopt); > - fail_if(ops->wbinvd == NULL); > - if ( (rc = ops->wbinvd(ctxt)) != 0 ) > + fail_if(!ops->cache_op); > + if ( (rc = ops->cache_op(vex.pfx ? x86emul_clflushopt > + : x86emul_clflush, > + ea.mem.seg, ea.mem.off, > + ctxt)) != X86EMUL_OKAY ) > goto done; > break; > default: > --- a/xen/arch/x86/x86_emulate/x86_emulate.h > +++ b/xen/arch/x86/x86_emulate/x86_emulate.h > @@ -176,6 +176,14 @@ enum x86_emulate_fpu_type { > X86EMUL_FPU_none > }; > > +enum x86emul_cache_op { > + x86emul_clflush, > + x86emul_clflushopt, > + x86emul_clwb, > + x86emul_invd, > + x86emul_wbinvd, > +}; > + > struct x86_emulate_state; > > /* > @@ -452,8 +460,15 @@ struct x86_emulate_ops > uint64_t val, > struct x86_emulate_ctxt *ctxt); > > - /* wbinvd: Write-back and invalidate cache contents. */ > - int (*wbinvd)( > + /* > + * cache_op: Write-back and/or invalidate cache contents. > + * > + * @seg:@offset applicable only to some of enum x86emul_cache_op. > + */ > + int (*cache_op)( > + enum x86emul_cache_op op, > + enum x86_segment seg, > + unsigned long offset, > struct x86_emulate_ctxt *ctxt); > > /* cpuid: Emulate CPUID via given set of EAX-EDX inputs/outputs. */ > --- a/xen/include/asm-x86/cpufeature.h > +++ b/xen/include/asm-x86/cpufeature.h > @@ -102,6 +102,8 @@ > #define cpu_has_rdseed boot_cpu_has(X86_FEATURE_RDSEED) > #define cpu_has_smap boot_cpu_has(X86_FEATURE_SMAP) > #define cpu_has_avx512_ifma boot_cpu_has(X86_FEATURE_AVX512_IFMA) > +#define cpu_has_clflushopt boot_cpu_has(X86_FEATURE_CLFLUSHOPT) > +#define cpu_has_clwb boot_cpu_has(X86_FEATURE_CLWB) > #define cpu_has_avx512er boot_cpu_has(X86_FEATURE_AVX512ER) > #define cpu_has_avx512cd boot_cpu_has(X86_FEATURE_AVX512CD) > #define cpu_has_sha boot_cpu_has(X86_FEATURE_SHA) > --- a/xen/include/asm-x86/system.h > +++ b/xen/include/asm-x86/system.h > @@ -21,6 +21,23 @@ static inline void clflush(const void *p > asm volatile ( "clflush %0" :: "m" (*(const char *)p) ); > } > > +static inline void clflushopt(const void *p) > +{ > + asm volatile ( "data16 clflush %0" :: "m" (*(const char *)p) ); > +} > + > +static inline void clwb(const void *p) > +{ > +#if defined(HAVE_AS_CLWB) > + asm volatile ( "clwb %0" :: "m" (*(const char *)p) ); > +#elif defined(HAVE_AS_XSAVEOPT) > + asm volatile ( "data16 xsaveopt %0" :: "m" (*(const char *)p) ); > +#else > + asm volatile ( ".byte 0x66, 0x0f, 0xae, 0x32" > + :: "d" (p), "m" (*(const char *)p) ); > +#endif > +} > + > #define xchg(ptr,v) \ > ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |