[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v2 03/10] x86emul: handle AVX512-FP16 move insns
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -622,6 +622,8 @@ static const struct test avx512_fp16_all INSN(maxsh, f3, map5, 5f, el, fp16, el), INSN(minph, , map5, 5d, vl, fp16, vl), INSN(minsh, f3, map5, 5d, el, fp16, el), + INSN(movsh, f3, map5, 10, el, fp16, el), + INSN(movsh, f3, map5, 11, el, fp16, el), INSN(mulph, , map5, 59, vl, fp16, vl), INSN(mulsh, f3, map5, 59, el, fp16, el), INSN(reduceph, , 0f3a, 56, vl, fp16, vl), @@ -635,6 +637,11 @@ static const struct test avx512_fp16_all INSN(ucomish, , map5, 2e, el, fp16, el), }; +static const struct test avx512_fp16_128[] = { + INSN(movw, 66, map5, 6e, el, fp16, el), + INSN(movw, 66, map5, 7e, el, fp16, el), +}; + static const struct test gfni_all[] = { INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl), INSN(gf2p8affineqb, 66, 0f3a, ce, vl, q, vl), @@ -1039,6 +1046,7 @@ void evex_disp8_test(void *instr, struct RUN(avx512_vp2intersect, all); RUN(avx512_vpopcntdq, all); RUN(avx512_fp16, all); + RUN(avx512_fp16, 128); if ( cpu_has_avx512f ) { --- a/tools/tests/x86_emulator/predicates.c +++ b/tools/tests/x86_emulator/predicates.c @@ -2029,6 +2029,8 @@ static const struct evex { { { 0xce }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineqb */ { { 0xcf }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineinvqb */ }, evex_map5[] = { + { { 0x10 }, 2, T, R, pfx_f3, W0, LIG }, /* vmovsh */ + { { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */ { { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */ { { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */ { { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */ @@ -2045,6 +2047,8 @@ static const struct evex { { { 0x5e }, 2, T, R, pfx_f3, W0, LIG }, /* vdivsh */ { { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */ { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */ + { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */ + { { 0x7e }, 2, T, W, pfx_66, WIG, L0 }, /* vmovw */ }; static const struct { --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -5140,6 +5140,76 @@ int main(int argc, char **argv) else printf("skipped\n"); + printf("%-40s", "Testing vmovsh 8(%ecx),%xmm5..."); + if ( stack_exec && cpu_has_avx512_fp16 ) + { + decl_insn(vmovsh_from_mem); + decl_insn(vmovw_to_gpr); + + asm volatile ( "vpcmpeqw %%ymm5, %%ymm5, %%ymm5\n\t" + put_insn(vmovsh_from_mem, + /* vmovsh 8(%0), %%xmm5 */ + ".byte 0x62, 0xf5, 0x7e, 0x08\n\t" + ".byte 0x10, 0x69, 0x04") + :: "c" (NULL) ); + + set_insn(vmovsh_from_mem); + res[2] = 0x3c00bc00; + regs.ecx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_from_mem) ) + goto fail; + asm volatile ( "kmovw %2, %%k1\n\t" + "vmovdqu16 %1, %%zmm4%{%%k1%}%{z%}\n\t" + "vpcmpeqw %%zmm4, %%zmm5, %%k0\n\t" + "kmovw %%k0, %0" + : "=g" (rc) + : "m" (res[2]), "r" (1) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing vmovsh %xmm4,2(%eax){%k3}..."); + memset(res, ~0, 8); + res[2] = 0xbc00ffff; + memset(res + 3, ~0, 8); + regs.eax = (unsigned long)res; + regs.ecx = ~0; + for ( i = 0; i < 2; ++i ) + { + decl_insn(vmovsh_to_mem); + + asm volatile ( "kmovw %1, %%k3\n\t" + put_insn(vmovsh_to_mem, + /* vmovsh %%xmm4, 2(%0)%{%%k3%} */ + ".byte 0x62, 0xf5, 0x7e, 0x0b\n\t" + ".byte 0x11, 0x60, 0x01") + :: "a" (NULL), "r" (i) ); + + set_insn(vmovsh_to_mem); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovsh_to_mem) || + memcmp(res, res + 3 - i, 8) ) + goto fail; + } + printf("okay\n"); + + printf("%-40s", "Testing vmovw %xmm5,%ecx..."); + asm volatile ( put_insn(vmovw_to_gpr, + /* vmovw %%xmm5, %0 */ + ".byte 0x62, 0xf5, 0x7d, 0x08\n\t" + ".byte 0x7e, 0xe9") + :: "c" (NULL) ); + set_insn(vmovw_to_gpr); + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || !check_eip(vmovw_to_gpr) || + regs.ecx != 0xbc00 ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + printf("%-40s", "Testing invpcid 16(%ecx),%%edx..."); if ( stack_exec ) { --- a/xen/arch/x86/x86_emulate/decode.c +++ b/xen/arch/x86/x86_emulate/decode.c @@ -585,7 +585,7 @@ static unsigned int decode_disp8scale(en break; case d8s_dq64: - return 2 + (s->op_bytes == 8); + return 1 + !s->fp16 + (s->op_bytes == 8); } switch ( s->simd_size ) @@ -1469,6 +1469,15 @@ int x86emul_decode(struct x86_emulate_st s->fp16 = true; s->simd_size = simd_none; break; + + case 0x6e: /* vmovw r/m16, xmm */ + d = (d & ~SrcMask) | SrcMem16; + /* fall through */ + case 0x7e: /* vmovw xmm, r/m16 */ + if ( s->evex.pfx == vex_66 ) + s->fp16 = true; + s->simd_size = simd_none; + break; } /* Like above re-use twobyte_table[] here. */ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -4390,6 +4390,15 @@ x86_emulate( #ifndef X86EMUL_NO_SIMD + case X86EMUL_OPC_EVEX_66(5, 0x7e): /* vmovw xmm,r/m16 */ + ASSERT(dst.bytes >= 4); + if ( dst.type == OP_MEM ) + dst.bytes = 2; + /* fall through */ + case X86EMUL_OPC_EVEX_66(5, 0x6e): /* vmovw r/m16,xmm */ + host_and_vcpu_must_have(avx512_fp16); + generate_exception_if(evex.w, EXC_UD); + /* fall through */ case X86EMUL_OPC_EVEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */ case X86EMUL_OPC_EVEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */ generate_exception_if((evex.lr || evex.opmsk || evex.brs || @@ -7745,8 +7754,18 @@ x86_emulate( #ifndef X86EMUL_NO_SIMD + case X86EMUL_OPC_EVEX_F3(5, 0x10): /* vmovsh m16,xmm{k} */ + /* vmovsh xmm,xmm,xmm{k} */ + case X86EMUL_OPC_EVEX_F3(5, 0x11): /* vmovsh xmm,m16{k} */ + /* vmovsh xmm,xmm,xmm{k} */ + generate_exception_if(evex.brs, EXC_UD); + if ( ea.type == OP_MEM ) + d |= TwoOp; + else + { case X86EMUL_OPC_EVEX_F3(5, 0x51): /* vsqrtsh xmm/m16,xmm,xmm{k} */ - d &= ~TwoOp; + d &= ~TwoOp; + } /* fall through */ case X86EMUL_OPC_EVEX(5, 0x51): /* vsqrtph [xyz]mm/mem,[xyz]mm{k} */ CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x58): /* vadd{p,s}h [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |