[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 04/17] x86emul: support F16C insns



On Wed, Jun 21, 2017 at 1:01 PM, Jan Beulich <JBeulich@xxxxxxxx> wrote:
> Note that this avoids emulating the behavior of VCVTPS2PH found on at
> least some Intel CPUs, which update MXCSR even when the memory write
> faults.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -3028,6 +3028,47 @@ int main(int argc, char **argv)
>          printf("skipped\n");
>  #endif
>
> +    printf("%-40s", "Testing vcvtph2ps (%ecx),%ymm1...");
> +    if ( stack_exec && cpu_has_f16c )
> +    {
> +        decl_insn(vcvtph2ps);
> +        decl_insn(vcvtps2ph);
> +
> +        asm volatile ( "vxorps %%xmm1, %%xmm1, %%xmm1\n"
> +                       put_insn(vcvtph2ps, "vcvtph2ps (%0), %%ymm1")
> +                       :: "c" (NULL) );
> +
> +        set_insn(vcvtph2ps);
> +        res[1] = 0x40003c00; /* (1.0, 2.0) */
> +        res[2] = 0x44004200; /* (3.0, 4.0) */
> +        res[3] = 0x3400b800; /* (-.5, .25) */
> +        res[4] = 0xbc000000; /* (0.0, -1.) */
> +        memset(res + 5, 0xff, 16);
> +        regs.ecx = (unsigned long)(res + 1);
> +        rc = x86_emulate(&ctxt, &emulops);
> +        asm volatile ( "vmovups %%ymm1, %0" : "=m" (res[16]) );
> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtph2ps) )
> +            goto fail;
> +        printf("okay\n");
> +
> +        printf("%-40s", "Testing vcvtps2ph $0,%ymm1,(%edx)...");
> +        asm volatile ( "vmovups %0, %%ymm1\n"
> +                       put_insn(vcvtps2ph, "vcvtps2ph $0, %%ymm1, (%1)")
> +                       :: "m" (res[16]), "d" (NULL) );
> +
> +        set_insn(vcvtps2ph);
> +        memset(res + 7, 0, 32);
> +        regs.edx = (unsigned long)(res + 7);
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(vcvtps2ph) ||
> +             memcmp(res + 1, res + 7, 16) ||
> +             res[11] || res[12] || res[13] || res[14] )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
>  #undef decl_insn
>  #undef put_insn
>  #undef set_insn
> --- a/tools/tests/x86_emulator/x86_emulate.h
> +++ b/tools/tests/x86_emulator/x86_emulate.h
> @@ -127,6 +127,14 @@ static inline uint64_t xgetbv(uint32_t x
>      (res.c & (1U << 28)) != 0; \
>  })
>
> +#define cpu_has_f16c ({ \
> +    struct cpuid_leaf res; \
> +    emul_test_cpuid(1, 0, &res, NULL); \
> +    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
> +        res.c = 0; \
> +    (res.c & (1U << 29)) != 0; \
> +})
> +
>  #define cpu_has_avx2 ({ \
>      struct cpuid_leaf res; \
>      emul_test_cpuid(1, 0, &res, NULL); \
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -369,6 +369,7 @@ static const struct {
>      [0x00 ... 0x0b] = { .simd_size = simd_packed_int },
>      [0x0c ... 0x0f] = { .simd_size = simd_packed_fp },
>      [0x10] = { .simd_size = simd_packed_int },
> +    [0x13] = { .simd_size = simd_other, .two_op = 1 },
>      [0x14 ... 0x15] = { .simd_size = simd_packed_fp },
>      [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
>      [0x18 ... 0x19] = { .simd_size = simd_scalar_fp, .two_op = 1 },
> @@ -411,6 +412,7 @@ static const struct {
>      [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 },
>      [0x18] = { .simd_size = simd_128 },
>      [0x19] = { .simd_size = simd_128, .to_mem = 1, .two_op = 1 },
> +    [0x1d] = { .simd_size = simd_other, .to_mem = 1, .two_op = 1 },
>      [0x20] = { .simd_size = simd_none },
>      [0x21] = { .simd_size = simd_other },
>      [0x22] = { .simd_size = simd_none },
> @@ -1601,6 +1603,7 @@ static bool vcpu_has(
>  #define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
>  #define vcpu_has_aesni()       vcpu_has(         1, ECX, 25, ctxt, ops)
>  #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
> +#define vcpu_has_f16c()        vcpu_has(         1, ECX, 29, ctxt, ops)
>  #define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
>  #define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
>                                 vcpu_has_sse())
> @@ -7216,6 +7219,12 @@ x86_emulate(
>          host_and_vcpu_must_have(sse4_1);
>          goto simd_0f38_common;
>
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x13): /* vcvtph2ps xmm/mem,{x,y}mm */
> +        generate_exception_if(vex.w, EXC_UD);
> +        host_and_vcpu_must_have(f16c);
> +        op_bytes = 8 << vex.l;
> +        goto simd_0f_ymm;
> +
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
> @@ -7607,6 +7616,50 @@ x86_emulate(
>          opc = init_prefixes(stub);
>          goto pextr;
>
> +    case X86EMUL_OPC_VEX_66(0x0f3a, 0x1d): /* vcvtps2ph 
> $imm8,{x,y}mm,xmm/mem */

On the whole this stanza looks plausible; just a few comments...

> +    {
> +        uint32_t mxcsr;
> +
> +        generate_exception_if(vex.w || vex.reg != 0xf, EXC_UD);
> +        host_and_vcpu_must_have(f16c);
> +        fail_if(!ops->write);
> +
> +        opc = init_prefixes(stub);
> +        opc[0] = b;
> +        opc[1] = modrm;
> +        if ( ea.type == OP_MEM )
> +        {
> +            /* Convert memory operand to (%rAX). */
> +            vex.b = 1;
> +            opc[1] &= 0x38;
> +        }

First of all, I'm not a fan of modifying the actual decoded vex prefix
here.  I realize that as it happens vex.b won't be read again; but it
just seems like risky behavior.  Would it make more sense to make a
copy of vex and modify that?

Secondly, the logic here is confusing: you first copy the decoded
modrm byte, then modify one bit of the decoded vex prefix, then modify
the copied modrm byte, then (below) copy the (potentially) modified
vex prefix.

It seems like it would be a lot more clear to have the if before the
init_prefixes, and have it look like this:

if ( ea.type == OP_MEM )
{
  /* Convert blah */
  vex.b = 1;
  modrm &= 0x38
}

(Or lvex and lmodrm, which are local variables containing copies of
the decoded vex prefix and modrm byte.)

> +        opc[2] = imm1;
> +        fic.insn_bytes = PFX_BYTES + 3;
> +        opc[3] = 0xc3;

Open-coding this 'ret' makes this harder to parse.

> +
> +        copy_VEX(opc, vex);

If we move the vex & modrm adjustment above the init_prefixes(), then
this can go right after init_prefixes(); that way the bytes are all
basically set in order.

At very least this logically goes with the other "set up opc" code, so
if left here the blank line should be after this rather than before
this.

More later. :-)

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.