[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86emul: support AVX512-BMM



commit ea3d4312f7f34451a360a1a65162f220e329aeb3
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Thu Apr 9 08:25:25 2026 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Apr 9 08:25:25 2026 +0200

    x86emul: support AVX512-BMM
    
    EVEX.W meaning is unusual for VBMAC{,X}OR16x16x16, but that needs taking
    care of only in the test harness.
    
    Like already proposed in "x86emul: support AVX10.1", use just
    vcpu_must_have(), not host_and_vcpu_must_have().
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Acked-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 tools/tests/x86_emulator/evex-disp8.c       | 17 +++++++++++++++++
 tools/tests/x86_emulator/predicates.c       |  3 +++
 tools/tests/x86_emulator/x86-emulate.h      |  2 ++
 xen/arch/x86/x86_emulate/decode.c           |  1 +
 xen/arch/x86/x86_emulate/private.h          |  1 +
 xen/arch/x86/x86_emulate/x86_emulate.c      | 13 +++++++++++++
 xen/include/public/arch-x86/cpufeatureset.h |  1 +
 xen/tools/gen-cpuid.py                      |  2 +-
 8 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/tools/tests/x86_emulator/evex-disp8.c 
b/tools/tests/x86_emulator/evex-disp8.c
index efb6c70637..3bd5944dd8 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -689,6 +689,15 @@ static const struct test avx512_fp16_128[] = {
     INSN(movw, 66, map5, 7e, el, fp16, el),
 };
 
+static const struct test avx512_bmm_all[] = {
+    INSN(bitrev,   , map6, 81, vl, b, vl),
+};
+
+static const struct test avx512_bmm_no128[] = {
+    INSN(bmacor16x16x16,    , map6, 81, vl, w, vl),
+    INSN(bmacxor16x16x16,   , map6, 81, vl, w, vl),
+};
+
 static const struct test gfni_all[] = {
     INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
     INSN(gf2p8affineqb,    66, 0f3a, ce, vl, q, vl),
@@ -817,6 +826,12 @@ static void test_one(const struct test *test, enum vl vl,
 
     case ESZ_w:
         evex.w = 1;
+        /*
+         * VBMAC{,X}OR16x16x16 don't follow the general pattern: EVEX.W 
controls
+         * reduction kind there, not element size.
+         */
+        if ( test->spc == SPC_map6 && !test->pfx && test->opc == 0x80 )
+            evex.w = test->mnemonic[4] == 'x';
         /* fall through */
     case ESZ_fp16:
         esz = 2;
@@ -1087,6 +1102,8 @@ void evex_disp8_test(void *instr, struct x86_emulate_ctxt 
*ctxt,
     RUN(avx512_vpopcntdq, all);
     RUN(avx512_fp16, all);
     RUN(avx512_fp16, 128);
+    RUN(avx512_bmm, all);
+    RUN(avx512_bmm, no128);
 
     if ( cpu_has_avx512f )
     {
diff --git a/tools/tests/x86_emulator/predicates.c 
b/tools/tests/x86_emulator/predicates.c
index afe2e297d9..019c2ced8a 100644
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2140,6 +2140,9 @@ static const struct evex {
     { { 0x56 }, 2, T, R, pfx_f2, W0, Ln }, /* vfcmaddcph */
     { { 0x57 }, 2, T, R, pfx_f3, W0, LIG }, /* vfmaddcsh */
     { { 0x57 }, 2, T, R, pfx_f2, W0, LIG }, /* vfcmaddcsh */
+    { { 0x80 }, 2, T, R, pfx_no, W0, L1 | L2 }, /* vbmacor16x16x16 */
+    { { 0x80 }, 2, T, R, pfx_no, W1, L1 | L2 }, /* vbmacxor16x16x16 */
+    { { 0x81 }, 2, T, R, pfx_no, W0, Ln }, /* vbitrev */
     { { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */
     { { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */
     { { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */
diff --git a/tools/tests/x86_emulator/x86-emulate.h 
b/tools/tests/x86_emulator/x86-emulate.h
index 4c292ac338..62ebd881c9 100644
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -223,6 +223,8 @@ void wrpkru(unsigned int val);
 #define cpu_has_xop                 (cpu_policy.extd.xop  && xcr0_mask(6))
 #define cpu_has_fma4                (cpu_policy.extd.fma4 && xcr0_mask(6))
 #define cpu_has_tbm                  cpu_policy.extd.tbm
+#define cpu_has_avx512_bmm          (cpu_policy.extd.avx512_bmm && \
+                                     xcr0_mask(0xe6))
 
 int emul_test_cpuid(
     uint32_t leaf,
diff --git a/xen/arch/x86/x86_emulate/decode.c 
b/xen/arch/x86/x86_emulate/decode.c
index 7ce97c4726..2fe77c4d8d 100644
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -387,6 +387,7 @@ static const struct ext0f38_table {
     [0x7a ... 0x7c] = { .simd_size = simd_none, .two_op = 1 },
     [0x7d ... 0x7e] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x7f] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x80 ... 0x81] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x82] = { .simd_size = simd_other },
     [0x83] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x88] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_dq },
diff --git a/xen/arch/x86/x86_emulate/private.h 
b/xen/arch/x86/x86_emulate/private.h
index 24c79c4e8f..dfdc263f9c 100644
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -566,6 +566,7 @@ amd_like(const struct x86_emulate_ctxt *ctxt)
 #define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
 #define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
 #define vcpu_has_nscb()        (ctxt->cpuid->extd.nscb)
+#define vcpu_has_avx512_bmm()  (ctxt->cpuid->extd.avx512_bmm)
 
 #define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
 #define vcpu_has_hle()         (ctxt->cpuid->feat.hle)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index 7751a67130..11d145e177 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7859,6 +7859,19 @@ x86_emulate(
         goto simd_zmm;
     }
 
+    case X86EMUL_OPC_EVEX(6, 0x80): /* vbmac{,x}or16x16x16 
[xyz]mm/mem,[xyz]mm,[xyz]mm */
+        vcpu_must_have(avx512_bmm);
+        generate_exception_if(!evex.lr || evex.brs || evex.opmsk, X86_EXC_UD);
+        avx512_vlen_check(false);
+        goto simd_zmm;
+
+    case X86EMUL_OPC_EVEX(6, 0x81): /* vbitrev [xyz]mm/mem,[xyz]mm */
+        vcpu_must_have(avx512_bmm);
+        generate_exception_if(evex.w || evex.brs || evex.reg != 0xf || 
!evex.RX,
+                              X86_EXC_UD);
+        avx512_vlen_check(false);
+        goto simd_zmm;
+
     case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
b/xen/include/public/arch-x86/cpufeatureset.h
index 438acba4e2..ce4a7982a5 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -333,6 +333,7 @@ XEN_CPUFEATURE(CPUID_USER_DIS,     11*32+17) /*   CPUID 
disable for CPL > 0 soft
 XEN_CPUFEATURE(EPSF,               11*32+18) /*A  Enhanced Predictive Store 
Forwarding */
 XEN_CPUFEATURE(FSRSC,              11*32+19) /*A  Fast Short REP SCASB */
 XEN_CPUFEATURE(AMD_PREFETCHI,      11*32+20) /*A  PREFETCHIT{0,1} Instructions 
*/
+XEN_CPUFEATURE(AVX512_BMM,         11*32+23) /*a  AVX512 Bitmap Manipulation 
Instructions */
 XEN_CPUFEATURE(SBPB,               11*32+27) /*A  Selective Branch Predictor 
Barrier */
 XEN_CPUFEATURE(IBPB_BRTYPE,        11*32+28) /*A  IBPB flushes Branch Type 
predictions too */
 XEN_CPUFEATURE(SRSO_NO,            11*32+29) /*A  Hardware not vulnerable to 
Speculative Return Stack Overflow */
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 13d85a4348..b751dbab27 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -307,7 +307,7 @@ def crunch_numbers(state):
         # dependents of AVX512BW (as to requiring wider than 16-bit mask
         # registers), despite the SDM not formally making this connection.
         AVX512BW: [AVX512_VBMI, AVX512_VBMI2, AVX512_BITALG, AVX512_BF16,
-                   AVX512_FP16],
+                   AVX512_FP16, AVX512_BMM],
 
         # Extensions with VEX/EVEX encodings keyed to a separate feature
         # flag are made dependents of their respective legacy feature.
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.