[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v9 10/10] x86emul: support AVX512-BMM


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Mon, 24 Nov 2025 16:02:05 +0100
  • Autocrypt: addr=jbeulich@xxxxxxxx; keydata= xsDiBFk3nEQRBADAEaSw6zC/EJkiwGPXbWtPxl2xCdSoeepS07jW8UgcHNurfHvUzogEq5xk hu507c3BarVjyWCJOylMNR98Yd8VqD9UfmX0Hb8/BrA+Hl6/DB/eqGptrf4BSRwcZQM32aZK 7Pj2XbGWIUrZrd70x1eAP9QE3P79Y2oLrsCgbZJfEwCgvz9JjGmQqQkRiTVzlZVCJYcyGGsD /0tbFCzD2h20ahe8rC1gbb3K3qk+LpBtvjBu1RY9drYk0NymiGbJWZgab6t1jM7sk2vuf0Py O9Hf9XBmK0uE9IgMaiCpc32XV9oASz6UJebwkX+zF2jG5I1BfnO9g7KlotcA/v5ClMjgo6Gl MDY4HxoSRu3i1cqqSDtVlt+AOVBJBACrZcnHAUSuCXBPy0jOlBhxPqRWv6ND4c9PH1xjQ3NP nxJuMBS8rnNg22uyfAgmBKNLpLgAGVRMZGaGoJObGf72s6TeIqKJo/LtggAS9qAUiuKVnygo 3wjfkS9A3DRO+SpU7JqWdsveeIQyeyEJ/8PTowmSQLakF+3fote9ybzd880fSmFuIEJldWxp Y2ggPGpiZXVsaWNoQHN1c2UuY29tPsJgBBMRAgAgBQJZN5xEAhsDBgsJCAcDAgQVAggDBBYC AwECHgECF4AACgkQoDSui/t3IH4J+wCfQ5jHdEjCRHj23O/5ttg9r9OIruwAn3103WUITZee e7Sbg12UgcQ5lv7SzsFNBFk3nEQQCACCuTjCjFOUdi5Nm244F+78kLghRcin/awv+IrTcIWF hUpSs1Y91iQQ7KItirz5uwCPlwejSJDQJLIS+QtJHaXDXeV6NI0Uef1hP20+y8qydDiVkv6l IreXjTb7DvksRgJNvCkWtYnlS3mYvQ9NzS9PhyALWbXnH6sIJd2O9lKS1Mrfq+y0IXCP10eS FFGg+Av3IQeFatkJAyju0PPthyTqxSI4lZYuJVPknzgaeuJv/2NccrPvmeDg6Coe7ZIeQ8Yj t0ARxu2xytAkkLCel1Lz1WLmwLstV30g80nkgZf/wr+/BXJW/oIvRlonUkxv+IbBM3dX2OV8 AmRv1ySWPTP7AAMFB/9PQK/VtlNUJvg8GXj9ootzrteGfVZVVT4XBJkfwBcpC/XcPzldjv+3 HYudvpdNK3lLujXeA5fLOH+Z/G9WBc5pFVSMocI71I8bT8lIAzreg0WvkWg5V2WZsUMlnDL9 mpwIGFhlbM3gfDMs7MPMu8YQRFVdUvtSpaAs8OFfGQ0ia3LGZcjA6Ik2+xcqscEJzNH+qh8V m5jjp28yZgaqTaRbg3M/+MTbMpicpZuqF4rnB0AQD12/3BNWDR6bmh+EkYSMcEIpQmBM51qM EKYTQGybRCjpnKHGOxG0rfFY1085mBDZCH5Kx0cl0HVJuQKC+dV2ZY5AqjcKwAxpE75MLFkr wkkEGBECAAkFAlk3nEQCGwwACgkQoDSui/t3IH7nnwCfcJWUDUFKdCsBH/E5d+0ZnMQi+G0A nAuWpQkjM1ASeQwSHEeAWPgskBQL
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Mon, 24 Nov 2025 15:02:09 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

EVEX.W meaning is unusual for VBMAC{,X}OR16x16x16, but that needs taking
care of only in the test harness.

Like already proposed in "x86emul: support AVX10.1", use just
vcpu_must_have(), not host_and_vcpu_must_have().

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
The Disp8Shift settings are guesswork; the binutils submission bogusly(?)
suggests no scaling at all.

No idea how to test this without having access to capable hardware. AMD,
to my knowledge, offers no equivalent to Intel's SDE.
---
v9: New.

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -689,6 +689,15 @@ static const struct test avx512_fp16_128
     INSN(movw, 66, map5, 7e, el, fp16, el),
 };
 
+static const struct test avx512_bmm_all[] = {
+    INSN(bitrev,   , map6, 81, vl, b, vl),
+};
+
+static const struct test avx512_bmm_no128[] = {
+    INSN(bmacor16x16x16,    , map6, 81, vl, w, vl),
+    INSN(bmacxor16x16x16,   , map6, 81, vl, w, vl),
+};
+
 static const struct test gfni_all[] = {
     INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
     INSN(gf2p8affineqb,    66, 0f3a, ce, vl, q, vl),
@@ -817,6 +826,12 @@ static void test_one(const struct test *
 
     case ESZ_w:
         evex.w = 1;
+        /*
+         * VBMAC{,X}OR16x16x16 don't follow the general pattern: EVEX.W 
controls
+         * reduction kind there, not element size.
+         */
+        if ( test->spc == SPC_map6 && !test->pfx && test->opc == 0x80 )
+            evex.w = test->mnemonic[4] == 'x';
         /* fall through */
     case ESZ_fp16:
         esz = 2;
@@ -1087,6 +1102,8 @@ void evex_disp8_test(void *instr, struct
     RUN(avx512_vpopcntdq, all);
     RUN(avx512_fp16, all);
     RUN(avx512_fp16, 128);
+    RUN(avx512_bmm, all);
+    RUN(avx512_bmm, no128);
 
     if ( cpu_has_avx512f )
     {
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2157,6 +2157,9 @@ static const struct evex {
     { { 0x56 }, 2, T, R, pfx_f2, W0, Ln }, /* vfcmaddcph */
     { { 0x57 }, 2, T, R, pfx_f3, W0, LIG }, /* vfmaddcsh */
     { { 0x57 }, 2, T, R, pfx_f2, W0, LIG }, /* vfcmaddcsh */
+    { { 0x80 }, 2, T, R, pfx_no, W0, L1 | L2 }, /* vbmacor16x16x16 */
+    { { 0x80 }, 2, T, R, pfx_no, W1, L1 | L2 }, /* vbmacxor16x16x16 */
+    { { 0x81 }, 2, T, R, pfx_no, W0, Ln }, /* vbitrev */
     { { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */
     { { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */
     { { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -223,6 +223,8 @@ void wrpkru(unsigned int val);
 #define cpu_has_xop                 (cpu_policy.extd.xop  && xcr0_mask(6))
 #define cpu_has_fma4                (cpu_policy.extd.fma4 && xcr0_mask(6))
 #define cpu_has_tbm                  cpu_policy.extd.tbm
+#define cpu_has_avx512_bmm          (cpu_policy.extd.avx512_bmm && \
+                                     xcr0_mask(0xe6))
 
 int emul_test_cpuid(
     uint32_t leaf,
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -334,6 +334,7 @@ XEN_CPUFEATURE(CPUID_USER_DIS,     11*32
 XEN_CPUFEATURE(EPSF,               11*32+18) /*A  Enhanced Predictive Store 
Forwarding */
 XEN_CPUFEATURE(FSRSC,              11*32+19) /*A  Fast Short REP SCASB */
 XEN_CPUFEATURE(AMD_PREFETCHI,      11*32+20) /*A  PREFETCHIT{0,1} Instructions 
*/
+XEN_CPUFEATURE(AVX512_BMM,         11*32+23) /*a  AVX512 Bitmap Manipulation 
Instructions */
 XEN_CPUFEATURE(SBPB,               11*32+27) /*A  Selective Branch Predictor 
Barrier */
 XEN_CPUFEATURE(IBPB_BRTYPE,        11*32+28) /*A  IBPB flushes Branch Type 
predictions too */
 XEN_CPUFEATURE(SRSO_NO,            11*32+29) /*A  Hardware not vulnerable to 
Speculative Return Stack Overflow */
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -308,7 +308,7 @@ def crunch_numbers(state):
         # dependents of AVX512BW (as to requiring wider than 16-bit mask
         # registers), despite the SDM not formally making this connection.
         AVX512BW: [AVX512_VBMI, AVX512_VBMI2, AVX512_BITALG, AVX512_BF16,
-                   AVX512_FP16],
+                   AVX512_FP16, AVX512_BMM],
 
         # Extensions with VEX/EVEX encodings keyed to a separate feature
         # flag are made dependents of their respective legacy feature.
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -387,6 +387,7 @@ static const struct ext0f38_table {
     [0x7a ... 0x7c] = { .simd_size = simd_none, .two_op = 1 },
     [0x7d ... 0x7e] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x7f] = { .simd_size = simd_packed_fp, .d8s = d8s_vl },
+    [0x80 ... 0x81] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x82] = { .simd_size = simd_other },
     [0x83] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
     [0x88] = { .simd_size = simd_packed_fp, .two_op = 1, .d8s = d8s_dq },
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -568,6 +568,7 @@ amd_like(const struct x86_emulate_ctxt *
 #define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
 #define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
 #define vcpu_has_nscb()        (ctxt->cpuid->extd.nscb)
+#define vcpu_has_avx512_bmm()  (ctxt->cpuid->extd.avx512_bmm)
 
 #define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
 #define vcpu_has_hle()         (ctxt->cpuid->feat.hle)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7990,6 +7990,19 @@ x86_emulate(
         goto simd_zmm;
     }
 
+    case X86EMUL_OPC_EVEX(6, 0x80): /* vbmac{,x}or16x16x16 
[xyz]mm/mem,[xyz]mm,[xyz]mm */
+        vcpu_must_have(avx512_bmm);
+        generate_exception_if(!evex.lr || evex.brs || evex.opmsk, X86_EXC_UD);
+        avx512_vlen_check(false);
+        goto simd_zmm;
+
+    case X86EMUL_OPC_EVEX(6, 0x81): /* vbitrev [xyz]mm/mem,[xyz]mm */
+        vcpu_must_have(avx512_bmm);
+        generate_exception_if(evex.w || evex.brs || evex.reg != 0xf || 
!evex.RX,
+                              X86_EXC_UD);
+        avx512_vlen_check(false);
+        goto simd_zmm;
+
     case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.