|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 18/34] x86emul: support AVX512{F, BW} packed integer arithmetic insns
Note: vpadd* / vpsub* et al are put at seemingly the wrong slot of the
big switch(). This is in anticipation of adding vpunpck* to those
groups (see the legacy/VEX encoded case labels nearby to support this).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v3: New.
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -146,6 +146,8 @@ static const struct test avx512f_all[] =
INSN_PFP_NB(movu, 0f, 10),
INSN_PFP_NB(movu, 0f, 11),
INSN_FP(mul, 0f, 59),
+ INSN(paddd, 66, 0f, fe, vl, d, vl),
+ INSN(paddq, 66, 0f, d4, vl, q, vl),
INSN(pand, 66, 0f, db, vl, dq, vl),
INSN(pandn, 66, 0f, df, vl, dq, vl),
INSN(pcmp, 66, 0f3a, 1f, vl, dq, vl),
@@ -154,7 +156,16 @@ static const struct test avx512f_all[] =
INSN(pcmpgtd, 66, 0f, 66, vl, d, vl),
INSN(pcmpgtq, 66, 0f38, 37, vl, q, vl),
INSN(pcmpu, 66, 0f3a, 1e, vl, dq, vl),
+ INSN(pmaxs, 66, 0f38, 3d, vl, dq, vl),
+ INSN(pmaxu, 66, 0f38, 3f, vl, dq, vl),
+ INSN(pmins, 66, 0f38, 39, vl, dq, vl),
+ INSN(pminu, 66, 0f38, 3b, vl, dq, vl),
+ INSN(pmuldq, 66, 0f38, 28, vl, q, vl),
+ INSN(pmulld, 66, 0f38, 40, vl, d, vl),
+ INSN(pmuludq, 66, 0f, f4, vl, q, vl),
INSN(por, 66, 0f, eb, vl, dq, vl),
+ INSN(psubd, 66, 0f, fa, vl, d, vl),
+ INSN(psubq, 66, 0f, fb, vl, q, vl),
INSN(pternlog, 66, 0f3a, 25, vl, dq, vl),
INSN(ptestm, 66, 0f38, 27, vl, dq, vl),
INSN(ptestnm, f3, 0f38, 27, vl, dq, vl),
@@ -189,12 +200,39 @@ static const struct test avx512bw_all[]
INSN(movdqu8, f2, 0f, 7f, vl, b, vl),
INSN(movdqu16, f2, 0f, 6f, vl, w, vl),
INSN(movdqu16, f2, 0f, 7f, vl, w, vl),
+ INSN(paddb, 66, 0f, fc, vl, b, vl),
+ INSN(paddsb, 66, 0f, ec, vl, b, vl),
+ INSN(paddsw, 66, 0f, ed, vl, w, vl),
+ INSN(paddusb, 66, 0f, dc, vl, b, vl),
+ INSN(paddusw, 66, 0f, dd, vl, w, vl),
+ INSN(paddw, 66, 0f, fd, vl, w, vl),
+ INSN(pavgb, 66, 0f, e0, vl, b, vl),
+ INSN(pavgw, 66, 0f, e3, vl, w, vl),
INSN(pcmp, 66, 0f3a, 3f, vl, bw, vl),
INSN(pcmpeqb, 66, 0f, 74, vl, b, vl),
INSN(pcmpeqw, 66, 0f, 75, vl, w, vl),
INSN(pcmpgtb, 66, 0f, 64, vl, b, vl),
INSN(pcmpgtw, 66, 0f, 65, vl, w, vl),
INSN(pcmpu, 66, 0f3a, 3e, vl, bw, vl),
+ INSN(pmaddwd, 66, 0f, f5, vl, w, vl),
+ INSN(pmaxsb, 66, 0f38, 3c, vl, b, vl),
+ INSN(pmaxsw, 66, 0f, ee, vl, w, vl),
+ INSN(pmaxub, 66, 0f, de, vl, b, vl),
+ INSN(pmaxuw, 66, 0f38, 3e, vl, w, vl),
+ INSN(pminsb, 66, 0f38, 38, vl, b, vl),
+ INSN(pminsw, 66, 0f, ea, vl, w, vl),
+ INSN(pminub, 66, 0f, da, vl, b, vl),
+ INSN(pminuw, 66, 0f38, 3a, vl, w, vl),
+ INSN(pmulhuw, 66, 0f, e4, vl, w, vl),
+ INSN(pmulhw, 66, 0f, e5, vl, w, vl),
+ INSN(pmullw, 66, 0f, d5, vl, w, vl),
+ INSN(psadbw, 66, 0f, f6, vl, b, vl),
+ INSN(psubb, 66, 0f, f8, vl, b, vl),
+ INSN(psubsb, 66, 0f, e8, vl, b, vl),
+ INSN(psubsw, 66, 0f, e9, vl, w, vl),
+ INSN(psubusb, 66, 0f, d8, vl, b, vl),
+ INSN(psubusw, 66, 0f, d9, vl, w, vl),
+ INSN(psubw, 66, 0f, f9, vl, w, vl),
INSN(ptestm, 66, 0f38, 26, vl, bw, vl),
INSN(ptestnm, f3, 0f38, 26, vl, bw, vl),
};
@@ -203,6 +241,7 @@ static const struct test avx512dq_all[]
INSN_PFP(and, 0f, 54),
INSN_PFP(andn, 0f, 55),
INSN_PFP(or, 0f, 56),
+ INSN(pmullq, 66, 0f38, 40, vl, q, vl),
INSN_PFP(xor, 0f, 57),
};
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -365,21 +365,21 @@ static const struct twobyte_table {
[0xc8 ... 0xcf] = { ImplicitOps },
[0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
[0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
[0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
[0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
- [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
[0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
- [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xff] = { ModRM }
};
@@ -449,7 +449,7 @@ static const struct ext0f38_table {
[0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
[0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
[0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
- [0x40] = { .simd_size = simd_packed_int },
+ [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x45 ... 0x47] = { .simd_size = simd_packed_int },
[0x58 ... 0x59] = { .simd_size = simd_other, .two_op = 1 },
@@ -5960,6 +5960,10 @@ x86_emulate(
case X86EMUL_OPC_EVEX_66(0x0f, 0xdf): /* vpandn{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f, 0xeb): /* vpor{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f, 0xef): /* vpxor{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x39): /* vpmins{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3b): /* vpminu{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3d): /* vpmaxs{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3f): /* vpmaxu{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
avx512f_no_sae:
host_and_vcpu_must_have(avx512f);
generate_exception_if(ea.type != OP_MEM && evex.br, EXC_UD);
@@ -6560,6 +6564,37 @@ x86_emulate(
get_fpu(X86EMUL_FPU_mmx);
goto simd_0f_common;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ fault_suppression = false;
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd8): /* vpsubusb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd9): /* vpsubusw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xdc): /* vpaddusb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xdd): /* vpaddusw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe0): /* vpavgb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe3): /* vpavgw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe5): /* vpmulhw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe8): /* vpsubsb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe9): /* vpsubsw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xec): /* vpaddsb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xed): /* vpaddsw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf8): /* vpsubb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = 1 << (b & 1);
+ goto avx512f_no_sae;
+
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfa): /* vpsubd
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfb): /* vpsubq
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfe): /* vpaddd
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ generate_exception_if(evex.w != (b & 1), EXC_UD);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w}
[xyz]mm/mem,[xyz]mm,k{k} */
case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q}
[xyz]mm/mem,[xyz]mm,k{k} */
op_bytes = 16 << evex.lr;
@@ -6586,6 +6621,12 @@ x86_emulate(
avx512_vlen_check(false);
goto simd_zmm;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ generate_exception_if(!evex.w, EXC_UD);
+ goto avx512f_no_sae;
+
CASE_SIMD_PACKED_INT(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */
case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
CASE_SIMD_PACKED_INT(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */
@@ -7837,6 +7878,16 @@ x86_emulate(
vcpu_must_have(mmxext);
goto simd_0f_mmx;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xea): /* vpminsw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xee): /* vpmaxsw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = b & 0x10 ? 1 : 2;
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_66(0x0f, 0xe6): /* cvttpd2dq xmm/mem,xmm */
case X86EMUL_OPC_VEX_66(0x0f, 0xe6): /* vcvttpd2dq {x,y}mm/mem,xmm */
case X86EMUL_OPC_F3(0x0f, 0xe6): /* cvtdq2pd xmm/mem,xmm */
@@ -8210,6 +8261,20 @@ x86_emulate(
host_and_vcpu_must_have(sse4_2);
goto simd_0f38_common;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x38): /* vpminsb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3a): /* vpminuw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3c): /* vpmaxsb
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3e): /* vpmaxuw
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = b & 2 ?: 1;
+ goto avx512f_no_sae;
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x40): /* vpmull{d,q}
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ if ( evex.w )
+ host_and_vcpu_must_have(avx512dq);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_66(0x0f38, 0xdb): /* aesimc xmm/m128,xmm */
case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */
case X86EMUL_OPC_66(0x0f38, 0xdc): /* aesenc xmm/m128,xmm,xmm */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |