|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] x86emul: support AVX512-BMM
commit ea3d4312f7f34451a360a1a65162f220e329aeb3
Author: Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Thu Apr 9 08:25:25 2026 +0200
Commit: Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Apr 9 08:25:25 2026 +0200
x86emul: support AVX512-BMM
EVEX.W meaning is unusual for VBMAC{,X}OR16x16x16, but that needs taking
care of only in the test harness.
Like already proposed in "x86emul: support AVX10.1", use just
vcpu_must_have(), not host_and_vcpu_must_have().
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
tools/tests/x86_emulator/evex-disp8.c | 17 +++++++++++++++++
tools/tests/x86_emulator/predicates.c | 3 +++
tools/tests/x86_emulator/x86-emulate.h | 2 ++
xen/arch/x86/x86_emulate/decode.c | 1 +
xen/arch/x86/x86_emulate/private.h | 1 +
xen/arch/x86/x86_emulate/x86_emulate.c | 13 +++++++++++++
xen/include/public/arch-x86/cpufeatureset.h | 1 +
xen/tools/gen-cpuid.py | 2 +-
8 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/tools/tests/x86_emulator/evex-disp8.c
b/tools/tests/x86_emulator/evex-disp8.c
index efb6c70637..3bd5944dd8 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -689,6 +689,15 @@ static const struct test avx512_fp16_128[] = {
INSN(movw, 66, map5, 7e, el, fp16, el),
};
+static const struct test avx512_bmm_all[] = {
+ INSN(bitrev, , map6, 81, vl, b, vl),
+};
+
+static const struct test avx512_bmm_no128[] = {
+ INSN(bmacor16x16x16, , map6, 81, vl, w, vl),
+ INSN(bmacxor16x16x16, , map6, 81, vl, w, vl),
+};
+
static const struct test gfni_all[] = {
INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
INSN(gf2p8affineqb, 66, 0f3a, ce, vl, q, vl),
@@ -817,6 +826,12 @@ static void test_one(const struct test *test, enum vl vl,
case ESZ_w:
evex.w = 1;
+ /*
+ * VBMAC{,X}OR16x16x16 don't follow the general pattern: EVEX.W
controls
+ * reduction kind there, not element size.
+ */
+ if ( test->spc == SPC_map6 && !test->pfx && test->opc == 0x80 )
+ evex.w = test->mnemonic[4] == 'x';
/* fall through */
case ESZ_fp16:
esz = 2;
@@ -1087,6 +1102,8 @@ void evex_disp8_test(void *instr, struct x86_emulate_ctxt
*ctxt,
RUN(avx512_vpopcntdq, all);
RUN(avx512_fp16, all);
RUN(avx512_fp16, 128);
+ RUN(avx512_bmm, all);
+ RUN(avx512_bmm, no128);
if ( cpu_has_avx512f )
{
diff --git a/tools/tests/x86_emulator/predicates.c
b/tools/tests/x86_emulator/predicates.c
index afe2e297d9..019c2ced8a 100644
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2140,6 +2140,9 @@ static const struct evex {
{ { 0x56 }, 2, T, R, pfx_f2, W0, Ln }, /* vfcmaddcph */
{ { 0x57 }, 2, T, R, pfx_f3, W0, LIG }, /* vfmaddcsh */
{ { 0x57 }, 2, T, R, pfx_f2, W0, LIG }, /* vfcmaddcsh */
+ { { 0x80 }, 2, T, R, pfx_no, W0, L1 | L2 }, /* vbmacor16x16x16 */
+ { { 0x80 }, 2, T, R, pfx_no, W1, L1 | L2 }, /* vbmacxor16x16x16 */
+ { { 0x81 }, 2, T, R, pfx_no, W0, Ln }, /* vbitrev */
{ { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */
{ { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */
{ { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */
diff --git a/tools/tests/x86_emulator/x86-emulate.h
b/tools/tests/x86_emulator/x86-emulate.h
index 4c292ac338..62ebd881c9 100644
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -223,6 +223,8 @@ void wrpkru(unsigned int val);
#define cpu_has_xop (cpu_policy.extd.xop && xcr0_mask(6))
#define cpu_has_fma4 (cpu_policy.extd.fma4 && xcr0_mask(6))
#define cpu_has_tbm cpu_policy.extd.tbm
+#define cpu_has_avx512_bmm (cpu_policy.extd.avx512_bmm && \
+ xcr0_mask(0xe6))
int emul_test_cpuid(
uint32_t leaf,
diff --git a/xen/arch/x86/x86_emulate/decode.c
b/xen/arch/x86/x86_emulate/decode.c
index 7ce97c4726..2fe77c4d8d 100644
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -387,6 +387,7 @@ static const struct ext0f38_table {
[0x7a ... 0x7c] = { .simd_size = simd_none, .two_op = 1 },
[0x7d ... 0x7e] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x7f] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+ [0x80 ... 0x81] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x82] = { .simd_size = simd_other },
[0x83] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x88] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_dq },
diff --git a/xen/arch/x86/x86_emulate/private.h
b/xen/arch/x86/x86_emulate/private.h
index 24c79c4e8f..dfdc263f9c 100644
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -566,6 +566,7 @@ amd_like(const struct x86_emulate_ctxt *ctxt)
#define vcpu_has_clzero() (ctxt->cpuid->extd.clzero)
#define vcpu_has_wbnoinvd() (ctxt->cpuid->extd.wbnoinvd)
#define vcpu_has_nscb() (ctxt->cpuid->extd.nscb)
+#define vcpu_has_avx512_bmm() (ctxt->cpuid->extd.avx512_bmm)
#define vcpu_has_bmi1() (ctxt->cpuid->feat.bmi1)
#define vcpu_has_hle() (ctxt->cpuid->feat.hle)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 7751a67130..11d145e177 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7859,6 +7859,19 @@ x86_emulate(
goto simd_zmm;
}
+ case X86EMUL_OPC_EVEX(6, 0x80): /* vbmac{,x}or16x16x16
[xyz]mm/mem,[xyz]mm,[xyz]mm */
+ vcpu_must_have(avx512_bmm);
+ generate_exception_if(!evex.lr || evex.brs || evex.opmsk, X86_EXC_UD);
+ avx512_vlen_check(false);
+ goto simd_zmm;
+
+ case X86EMUL_OPC_EVEX(6, 0x81): /* vbitrev [xyz]mm/mem,[xyz]mm */
+ vcpu_must_have(avx512_bmm);
+ generate_exception_if(evex.w || evex.brs || evex.reg != 0xf ||
!evex.RX,
+ X86_EXC_UD);
+ avx512_vlen_check(false);
+ goto simd_zmm;
+
case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
diff --git a/xen/include/public/arch-x86/cpufeatureset.h
b/xen/include/public/arch-x86/cpufeatureset.h
index 438acba4e2..ce4a7982a5 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -333,6 +333,7 @@ XEN_CPUFEATURE(CPUID_USER_DIS, 11*32+17) /* CPUID
disable for CPL > 0 soft
XEN_CPUFEATURE(EPSF, 11*32+18) /*A Enhanced Predictive Store
Forwarding */
XEN_CPUFEATURE(FSRSC, 11*32+19) /*A Fast Short REP SCASB */
XEN_CPUFEATURE(AMD_PREFETCHI, 11*32+20) /*A PREFETCHIT{0,1} Instructions
*/
+XEN_CPUFEATURE(AVX512_BMM, 11*32+23) /*a AVX512 Bitmap Manipulation
Instructions */
XEN_CPUFEATURE(SBPB, 11*32+27) /*A Selective Branch Predictor
Barrier */
XEN_CPUFEATURE(IBPB_BRTYPE, 11*32+28) /*A IBPB flushes Branch Type
predictions too */
XEN_CPUFEATURE(SRSO_NO, 11*32+29) /*A Hardware not vulnerable to
Speculative Return Stack Overflow */
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 13d85a4348..b751dbab27 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -307,7 +307,7 @@ def crunch_numbers(state):
# dependents of AVX512BW (as to requiring wider than 16-bit mask
# registers), despite the SDM not formally making this connection.
AVX512BW: [AVX512_VBMI, AVX512_VBMI2, AVX512_BITALG, AVX512_BF16,
- AVX512_FP16],
+ AVX512_FP16, AVX512_BMM],
# Extensions with VEX/EVEX encodings keyed to a separate feature
# flag are made dependents of their respective legacy feature.
--
generated by git-patchbot for /home/xen/git/xen.git#master
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |