[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v3 13/16] x86emul: support AVX10.2 saturating convert insns
While the to-byte ones are somewhat different from what has been there (yet then nicely regular from an operands perspective), the others are pretty similar to various existing insns. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- Spec rev 002 says VCVTTNEBF162I{,U}BS, yet that's going to change to VCVTTBF162I{,U}BS. --- SDE: ??? --- v3: New. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -719,6 +719,30 @@ static const struct test avx10_2_all[] = INSN(comxsd, f3, 0f, 2f, el, q, el), INSN(comxsh, f2, map5, 2f, el, fp16, el), INSN(comxss, f2, 0f, 2f, el, d, el), + INSN(cvtnebf162ibs, f2, map5, 69, vl, bf16, vl), + INSN(cvtnebf162iubs, f2, map5, 6b, vl, bf16, vl), + INSN(cvtph2ibs, , map5, 69, vl, fp16, vl), + INSN(cvtph2iubs, , map5, 6b, vl, fp16, vl), + INSN(cvtps2ibs, 66, map5, 69, vl, d, vl), + INSN(cvtps2iubs, 66, map5, 6b, vl, d, vl), + INSN(cvttbf162ibs, f2, map5, 68, vl, bf16, vl), + INSN(cvttbf162iubs, f2, map5, 6a, vl, bf16, vl), + INSN(cvttpd2dqs, , map5, 6d, vl, q, vl), + INSN(cvttpd2qqs, 66, map5, 6d, vl, q, vl), + INSN(cvttpd2udqs, , map5, 6c, vl, q, vl), + INSN(cvttpd2uqqs, 66, map5, 6c, vl, q, vl), + INSN(cvttph2ibs, , map5, 68, vl, fp16, vl), + INSN(cvttph2iubs, , map5, 6a, vl, fp16, vl), + INSN(cvttps2dqs, , map5, 6d, vl, d, vl), + INSN(cvttps2ibs, 66, map5, 68, vl, d, vl), + INSN(cvttps2iubs, 66, map5, 6a, vl, d, vl), + INSN(cvttps2qqs, 66, map5, 6d, vl_2, d, vl), + INSN(cvttps2udqs, , map5, 6c, vl, d, vl), + INSN(cvttps2uqqs, 66, map5, 6c, vl_2, d, vl), + INSN(cvttsd2sis, f2, map5, 6d, el, q, el), + INSN(cvttsd2usis, f2, map5, 6c, el, q, el), + INSN(cvttss2sis, f3, map5, 6d, el, d, el), + INSN(cvttss2usis, f3, map5, 6c, el, d, el), INSN(divnepbf16, 66, map5, 5e, vl, bf16, vl), INSN(dpphps, , 0f38, 52, vl, d, vl), INSN(fmadd132nepbf16, , map6, 98, vl, bf16, vl), --- a/tools/tests/x86_emulator/predicates.c +++ b/tools/tests/x86_emulator/predicates.c @@ -2162,6 +2162,26 @@ static const struct evex { { { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */ { { 0x5f }, 2, T, R, pfx_66, W0, Ln }, /* vmaxpbf16 */ { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */ + { { 0x68 }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2ibs */ + { { 0x68 }, 2, T, R, pfx_66, W0, Ln }, /* vcvttps2ibs */ + { { 0x68 }, 2, T, R, pfx_f2, W0, Ln }, /* vcvttbf162ibs */ + { { 0x69 }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2ibs */ + { { 0x69 }, 2, T, R, pfx_66, W0, Ln }, /* vcvtps2ibs */ + { { 0x69 }, 2, T, R, pfx_f2, W0, Ln }, /* vcvtnebf162ibs */ + { { 0x6a }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2iubs */ + { { 0x6a }, 2, T, R, pfx_66, W0, Ln }, /* vcvttps2iubs */ + { { 0x6a }, 2, T, R, pfx_f2, W0, Ln }, /* vcvttbf162iubs */ + { { 0x6b }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2iubs */ + { { 0x6b }, 2, T, R, pfx_66, W0, Ln }, /* vcvtps2iubs */ + { { 0x6b }, 2, T, R, pfx_f2, W0, Ln }, /* vcvtnebf162iubs */ + { { 0x6c }, 2, T, R, pfx_no, Wn, Ln }, /* vcvttp{s,d}2udqs */ + { { 0x6c }, 2, T, R, pfx_66, Wn, Ln }, /* vcvttp{s,d}2uqqs */ + { { 0x6c }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttss2usis */ + { { 0x6c }, 2, T, R, pfx_f2, Wn, LIG }, /* vcvttsd2usis */ + { { 0x6d }, 2, T, R, pfx_no, Wn, Ln }, /* vcvttp{s,d}2dqs */ + { { 0x6d }, 2, T, R, pfx_66, Wn, Ln }, /* vcvttp{s,d}2qqs */ + { { 0x6d }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttss2sis */ + { { 0x6d }, 2, T, R, pfx_f2, Wn, LIG }, /* vcvttsd2sis */ { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */ { { 0x6e }, 2, T, R, pfx_f3, W0, L0 }, /* vmovw */ { { 0x78 }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2udq */ --- a/xen/arch/x86/x86_emulate/decode.c +++ b/xen/arch/x86/x86_emulate/decode.c @@ -1547,6 +1547,19 @@ int x86emul_decode(struct x86_emulate_st s->fp16 = true; break; + case 0x68: /* vcvtt{ph,ps,bf16}2ibs */ + case 0x69: /* vcvt{ph,ps,nebf16}2ibs */ + case 0x6a: /* vcvtt{ph,ps,bf16}2iubs */ + case 0x6b: /* vcvt{ph,ps,nebf16}2iubs */ + if ( !s->evex.pfx || s->evex.pfx == vex_f2 ) + s->fp16 = true; + /* fall through */ + case 0x6c: /* vcvttp{s,d}2u{d,q}qs, vcvtts{s,d}2usis */ + case 0x6d: /* vcvttp{s,d}2{d,q}qs, vcvtts{s,d}2sis */ + d |= TwoOp; + s->simd_size = simd_other; + break; + case 0x6e: /* vmovw r/x/m16, xmm */ d = (d & ~SrcMask) | SrcMem16; /* fall through */ @@ -1612,6 +1625,14 @@ int x86emul_decode(struct x86_emulate_st --disp8scale; break; + case 0x6c: /* vcvttps2uqqs and vcvts{s,d}2usi need special casing */ + case 0x6d: /* vcvttps2qqs and vcvts{s,d}2si need special casing */ + if ( s->evex.pfx == vex_66 && !s->evex.w && !s->evex.brs ) + --disp8scale; + else if ( s->evex.pfx & VEX_PREFIX_SCALAR_MASK ) + disp8scale = s->evex.pfx & VEX_PREFIX_DOUBLE_MASK ? 3 : 2; + break; + case 0x7a: case 0x7b: /* vcvt{,t}ph2qq need special casing */ if ( s->evex.pfx == vex_66 && !s->evex.brs ) disp8scale = s->evex.brs ? 1 : 2 + s->evex.lr; --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -8025,6 +8025,55 @@ x86_emulate( op_bytes = 8 << evex.lr; goto simd_zmm; + case X86EMUL_OPC_EVEX_F2(5, 0x68): /* vcvttbf162ibs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_F2(5, 0x69): /* vcvtnebf162ibs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_F2(5, 0x6a): /* vcvttbf162iubs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_F2(5, 0x6b): /* vcvtnebf162iubs [xyz]mm/mem,[xyz]mm{k} */ + generate_exception_if(ea.type != OP_MEM && evex.brs, X86_EXC_UD); + /* fall through */ + case X86EMUL_OPC_EVEX (5, 0x68): /* vcvttph2ibs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x68): /* vcvttps2ibs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX (5, 0x69): /* vcvtph2ibs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x69): /* vcvtps2ibs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX (5, 0x6a): /* vcvttph2iubs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x6a): /* vcvttps2iubs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX (5, 0x6b): /* vcvtph2iubs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x6b): /* vcvtps2iubs [xyz]mm/mem,[xyz]mm{k} */ + generate_exception_if(evex.w, X86_EXC_UD); + vcpu_must_have(avx10, 2); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + op_bytes = 16 << evex.lr; + goto simd_zmm; + + case X86EMUL_OPC_EVEX (5, 0x6c): /* vcvttps2udqs [xyz]mm/mem,[xyz]mm{k} */ + /* vcvttpd2udqs [xyz]mm/mem,{x,y}mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x6c): /* vcvttps2uqqs {x,y}mm/mem,[xyz]mm{k} */ + /* vcvttpd2uqqs [xyz]mm/mem,[xyz]mm{k} */ + case X86EMUL_OPC_EVEX (5, 0x6d): /* vcvttps2dqs [xyz]mm/mem,[xyz]mm{k} */ + /* vcvttpd2dqs [xyz]mm/mem,{x,y}mm{k} */ + case X86EMUL_OPC_EVEX_66(5, 0x6d): /* vcvttps2qqs {x,y}mm/mem,[xyz]mm{k} */ + /* vcvttpd2qqs [xyz]mm/mem,[xyz]mm{k} */ + vcpu_must_have(avx10, 2); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + op_bytes = 8 << ((evex.w || !evex.pfx) + evex.lr); + goto simd_zmm; + + CASE_SIMD_SCALAR_FP(_EVEX, 5, 0x6c): /* vcvtts{s,d}2usis xmm/mem,reg */ + CASE_SIMD_SCALAR_FP(_EVEX, 5, 0x6d): /* vcvtts{s,d}2sis xmm/mem,reg */ + generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R || + evex.opmsk), + X86_EXC_UD); + vcpu_must_have(avx10, 2); + if ( !evex.brs ) + avx512_vlen_check(true); + else + generate_exception_if(ea.type != OP_REG || !evex.u, X86_EXC_UD); + get_fpu(X86EMUL_FPU_zmm); + opc = init_evex(stub); + goto cvts_2si; + case X86EMUL_OPC_EVEX_66(5, 0x78): /* vcvttph2uqq xmm/mem,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(5, 0x79): /* vcvtph2uqq xmm/mem,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(5, 0x7a): /* vcvttph2qq xmm/mem,[xyz]mm{k} */
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |