Xen project Mailing List

Re: [Xen-devel] [PATCH 04/17] x86emul: support F16C insns

From: George Dunlap <george.dunlap@xxxxxxxxxx>

Date: Wed, 13 Sep 2017 18:10:42 +0100

Cc: xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

Delivery-date: Wed, 13 Sep 2017 17:16:14 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Wed, Jun 21, 2017 at 1:01 PM, Jan Beulich <JBeulich@xxxxxxxx> wrote: > Note that this avoids emulating the behavior of VCVTPS2PH found on at > least some Intel CPUs, which update MXCSR even when the memory write > faults. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > > --- a/tools/tests/x86_emulator/test_x86_emulator.c > +++ b/tools/tests/x86_emulator/test_x86_emulator.c > @@ -3028,6 +3028,47 @@ int main(int argc, char **argv) > printf("skipped\n"); > #endif > > + printf("%-40s", "Testing vcvtph2ps (%ecx),%ymm1..."); > + if ( stack_exec && cpu_has_f16c ) > + { > + decl_insn(vcvtph2ps); > + decl_insn(vcvtps2ph); > + > + asm volatile ( "vxorps %%xmm1, %%xmm1, %%xmm1\n" > + put_insn(vcvtph2ps, "vcvtph2ps (%0), %%ymm1") > + :: "c" (NULL) ); > + > + set_insn(vcvtph2ps); > + res[1] = 0x40003c00; /* (1.0, 2.0) */ > + res[2] = 0x44004200; /* (3.0, 4.0) */ > + res[3] = 0x3400b800; /* (-.5, .25) */ > + res[4] = 0xbc000000; /* (0.0, -1.) */ > + memset(res + 5, 0xff, 16); > + regs.ecx = (unsigned long)(res + 1); > + rc = x86_emulate(&ctxt, &emulops); > + asm volatile ( "vmovups %%ymm1, %0" : "=m" (res[16]) ); > + if ( rc != X86EMUL_OKAY || !check_eip(vcvtph2ps) ) > + goto fail; > + printf("okay\n"); > + > + printf("%-40s", "Testing vcvtps2ph $0,%ymm1,(%edx)..."); > + asm volatile ( "vmovups %0, %%ymm1\n" > + put_insn(vcvtps2ph, "vcvtps2ph $0, %%ymm1, (%1)") > + :: "m" (res[16]), "d" (NULL) ); > + > + set_insn(vcvtps2ph); > + memset(res + 7, 0, 32); > + regs.edx = (unsigned long)(res + 7); > + rc = x86_emulate(&ctxt, &emulops); > + if ( rc != X86EMUL_OKAY || !check_eip(vcvtps2ph) || > + memcmp(res + 1, res + 7, 16) || > + res[11] || res[12] || res[13] || res[14] ) > + goto fail; > + printf("okay\n"); > + } > + else > + printf("skipped\n"); > + > #undef decl_insn > #undef put_insn > #undef set_insn > --- a/tools/tests/x86_emulator/x86_emulate.h > +++ b/tools/tests/x86_emulator/x86_emulate.h > @@ -127,6 +127,14 @@ static inline uint64_t xgetbv(uint32_t x > (res.c & (1U << 28)) != 0; \ > }) > > +#define cpu_has_f16c ({ \ > + struct cpuid_leaf res; \ > + emul_test_cpuid(1, 0, &res, NULL); \ > + if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ > + res.c = 0; \ > + (res.c & (1U << 29)) != 0; \ > +}) > + > #define cpu_has_avx2 ({ \ > struct cpuid_leaf res; \ > emul_test_cpuid(1, 0, &res, NULL); \ > --- a/xen/arch/x86/x86_emulate/x86_emulate.c > +++ b/xen/arch/x86/x86_emulate/x86_emulate.c > @@ -369,6 +369,7 @@ static const struct { > [0x00 ... 0x0b] = { .simd_size = simd_packed_int }, > [0x0c ... 0x0f] = { .simd_size = simd_packed_fp }, > [0x10] = { .simd_size = simd_packed_int }, > + [0x13] = { .simd_size = simd_other, .two_op = 1 }, > [0x14 ... 0x15] = { .simd_size = simd_packed_fp }, > [0x17] = { .simd_size = simd_packed_int, .two_op = 1 }, > [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 }, > @@ -411,6 +412,7 @@ static const struct { > [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 }, > [0x18] = { .simd_size = simd_128 }, > [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 }, > + [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 }, > [0x20] = { .simd_size = simd_none }, > [0x21] = { .simd_size = simd_other }, > [0x22] = { .simd_size = simd_none }, > @@ -1601,6 +1603,7 @@ static bool vcpu_has( > #define vcpu_has_popcnt() vcpu_has( 1, ECX, 23, ctxt, ops) > #define vcpu_has_aesni() vcpu_has( 1, ECX, 25, ctxt, ops) > #define vcpu_has_avx() vcpu_has( 1, ECX, 28, ctxt, ops) > +#define vcpu_has_f16c() vcpu_has( 1, ECX, 29, ctxt, ops) > #define vcpu_has_rdrand() vcpu_has( 1, ECX, 30, ctxt, ops) > #define vcpu_has_mmxext() (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \ > vcpu_has_sse()) > @@ -7216,6 +7219,12 @@ x86_emulate( > host_and_vcpu_must_have(sse4_1); > goto simd_0f38_common; > > + case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */ > + generate_exception_if(vex.w, EXC_UD); > + host_and_vcpu_must_have(f16c); > + op_bytes = 8 << vex.l; > + goto simd_0f_ymm; > + > case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */ > case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */ > case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */ > @@ -7607,6 +7616,50 @@ x86_emulate( > opc = init_prefixes(stub); > goto pextr; > > + case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph > $imm8,{x,y}mm,xmm/mem */ On the whole this stanza looks plausible; just a few comments... > + { > + uint32_t mxcsr; > + > + generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD); > + host_and_vcpu_must_have(f16c); > + fail_if(!ops->write); > + > + opc = init_prefixes(stub); > + opc[0] = b; > + opc[1] = modrm; > + if ( ea.type == OP_MEM ) > + { > + /* Convert memory operand to (%rAX). */ > + vex.b = 1; > + opc[1] &= 0x38; > + } First of all, I'm not a fan of modifying the actual decoded vex prefix here. I realize that as it happens vex.b won't be read again; but it just seems like risky behavior. Would it make more sense to make a copy of vex and modify that? Secondly, the logic here is confusing: you first copy the decoded modrm byte, then modify one bit of the decoded vex prefix, then modify the copied modrm byte, then (below) copy the (potentially) modified vex prefix. It seems like it would be a lot more clear to have the if before the init_prefixes, and have it look like this: if ( ea.type == OP_MEM ) { /* Convert blah */ vex.b = 1; modrm &= 0x38 } (Or lvex and lmodrm, which are local variables containing copies of the decoded vex prefix and modrm byte.) > + opc[2] = imm1; > + fic.insn_bytes = PFX_BYTES + 3; > + opc[3] = 0xc3; Open-coding this 'ret' makes this harder to parse. > + > + copy_VEX(opc, vex); If we move the vex & modrm adjustment above the init_prefixes(), then this can go right after init_prefixes(); that way the bytes are all basically set in order. At very least this logically goes with the other "set up opc" code, so if left here the blank line should be after this rather than before this. More later. :-) -George _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.