[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 04/10] x86emul: handle AVX512-FP16 fma-like insns


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Mon, 3 Apr 2023 16:58:13 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=Oydr5RGLPiB/5UlaPCHWJTbrdc3fnjsbwu/jwCLiWIo=; b=TE+BkxGv3HX7deLzpjZOcZcPg9YOK3PVZ6EUVyuFgFy0+SnJdyPF0o8QyLYt5cOU2NGFNaRfrGMX4BrBHVpqeJwkZWrMaDqPShld/A9ZzGdKp3T3XZQ7V6rWqr39+7xWRQ3HAHieWoOlSKURUEIJrB5Rq9o7YgzT6tqe6hA2l8BFJTwji4X1vuh6RuVT70Nq1euktvc1xC0IaSgClJaS8OxyyCJAxndc+NzeEfHSn9fQ85FfMPGXgzh6dk3pbLDyikLMp4UBc9m7S5p/e8zO+6cZeI/YUaprycb9DK+QeFOnEm9y7AALs4AORHzD3+7O/0lEYZlDvPRFAgS+9fmFdw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=gqnEFc9p304BbjCR28meVw3Cgt/JGP1KNLoUbKnVLacoWdsNP90bat1sFpAujaGNvQaaCSGYPqp0dMf+GVAz3TyZ6ktSAgM8ZAmqPrDNA7FC10nB7P1CAjR4kIaaPD2Ej0z9sUrm0G0eRWz/cN1MI2u1cDcVBbMZX3NdGQNsjYQDZn6F4yu4RGWX8SyKeiwHoif3JZ1JgQF4B3Etd3BZyn/IaiwoFlUx69uMxIRkZt4/M/9EJFrEwBlyt3QCQFBhChrM2HNon4x//ER/GLvcxlN84XLsaMwqa1l2IVBzi6NClJNxefrTWEtAuLIhfmD/sILk+SfRBpobGNzVyjoYNw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Mon, 03 Apr 2023 14:58:29 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The Map6 encoding space is a very sparse clone of the "0f38" one. Once
again re-use that table, as the entries corresponding to invalid opcodes
in Map6 are simply benign with simd_size forced to other than simd_none
(preventing undue memory reads in SrcMem handling early in
x86_emulate()).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Add comments.

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -614,6 +614,36 @@ static const struct test avx512_fp16_all
     INSN(comish,          , map5, 2f,    el, fp16, el),
     INSN(divph,           , map5, 5e,    vl, fp16, vl),
     INSN(divsh,         f3, map5, 5e,    el, fp16, el),
+    INSN(fmadd132ph,    66, map6, 98,    vl, fp16, vl),
+    INSN(fmadd132sh,    66, map6, 99,    el, fp16, el),
+    INSN(fmadd213ph,    66, map6, a8,    vl, fp16, vl),
+    INSN(fmadd213sh,    66, map6, a9,    el, fp16, el),
+    INSN(fmadd231ph,    66, map6, b8,    vl, fp16, vl),
+    INSN(fmadd231sh,    66, map6, b9,    el, fp16, el),
+    INSN(fmaddsub132ph, 66, map6, 96,    vl, fp16, vl),
+    INSN(fmaddsub213ph, 66, map6, a6,    vl, fp16, vl),
+    INSN(fmaddsub231ph, 66, map6, b6,    vl, fp16, vl),
+    INSN(fmsub132ph,    66, map6, 9a,    vl, fp16, vl),
+    INSN(fmsub132sh,    66, map6, 9b,    el, fp16, el),
+    INSN(fmsub213ph,    66, map6, aa,    vl, fp16, vl),
+    INSN(fmsub213sh,    66, map6, ab,    el, fp16, el),
+    INSN(fmsub231ph,    66, map6, ba,    vl, fp16, vl),
+    INSN(fmsub231sh,    66, map6, bb,    el, fp16, el),
+    INSN(fmsubadd132ph, 66, map6, 97,    vl, fp16, vl),
+    INSN(fmsubadd213ph, 66, map6, a7,    vl, fp16, vl),
+    INSN(fmsubadd231ph, 66, map6, b7,    vl, fp16, vl),
+    INSN(fnmadd132ph,   66, map6, 9c,    vl, fp16, vl),
+    INSN(fnmadd132sh,   66, map6, 9d,    el, fp16, el),
+    INSN(fnmadd213ph,   66, map6, ac,    vl, fp16, vl),
+    INSN(fnmadd213sh,   66, map6, ad,    el, fp16, el),
+    INSN(fnmadd231ph,   66, map6, bc,    vl, fp16, vl),
+    INSN(fnmadd231sh,   66, map6, bd,    el, fp16, el),
+    INSN(fnmsub132ph,   66, map6, 9e,    vl, fp16, vl),
+    INSN(fnmsub132sh,   66, map6, 9f,    el, fp16, el),
+    INSN(fnmsub213ph,   66, map6, ae,    vl, fp16, vl),
+    INSN(fnmsub213sh,   66, map6, af,    el, fp16, el),
+    INSN(fnmsub231ph,   66, map6, be,    vl, fp16, vl),
+    INSN(fnmsub231sh,   66, map6, bf,    el, fp16, el),
     INSN(fpclassph,       , 0f3a, 66,    vl, fp16, vl),
     INSN(fpclasssh,       , 0f3a, 67,    el, fp16, el),
     INSN(getmantph,       , 0f3a, 26,    vl, fp16, vl),
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2049,6 +2049,37 @@ static const struct evex {
     { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */
     { { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */
     { { 0x7e }, 2, T, W, pfx_66, WIG, L0 }, /* vmovw */
+}, evex_map6[] = {
+    { { 0x96 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub132ph */
+    { { 0x97 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd132ph */
+    { { 0x98 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd132ph */
+    { { 0x99 }, 2, T, R, pfx_66, W0, LIG }, /* vfmadd132sh */
+    { { 0x9a }, 2, T, R, pfx_66, W0, Ln }, /* vfmsub132ph */
+    { { 0x9b }, 2, T, R, pfx_66, W0, LIG }, /* vfmsub132sh */
+    { { 0x9c }, 2, T, R, pfx_66, W0, Ln }, /* vfnmadd132ph */
+    { { 0x9d }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd132sh */
+    { { 0x9e }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub132ph */
+    { { 0x9f }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub132sh */
+    { { 0xa6 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub213ph */
+    { { 0xa7 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd213ph */
+    { { 0xa8 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd213ph */
+    { { 0xa9 }, 2, T, R, pfx_66, W0, LIG }, /* vfmadd213sh */
+    { { 0xaa }, 2, T, R, pfx_66, W0, Ln }, /* vfmsub213ph */
+    { { 0xab }, 2, T, R, pfx_66, W0, LIG }, /* vfmsub213sh */
+    { { 0xac }, 2, T, R, pfx_66, W0, Ln }, /* vfnmadd213ph */
+    { { 0xad }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd213sh */
+    { { 0xae }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub213ph */
+    { { 0xaf }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub213sh */
+    { { 0xb6 }, 2, T, R, pfx_66, W0, Ln }, /* vfmaddsub231ph */
+    { { 0xb7 }, 2, T, R, pfx_66, W0, Ln }, /* vfmsubadd231ph */
+    { { 0xb8 }, 2, T, R, pfx_66, W0, Ln }, /* vfmadd231ph */
+    { { 0xb9 }, 2, T, R, pfx_66, W0, LIG }, /* vfmadd231sh */
+    { { 0xba }, 2, T, R, pfx_66, W0, Ln }, /* vfmsub231ph */
+    { { 0xbb }, 2, T, R, pfx_66, W0, LIG }, /* vfmsub231sh */
+    { { 0xbc }, 2, T, R, pfx_66, W0, Ln }, /* vfnmadd231ph */
+    { { 0xbd }, 2, T, R, pfx_66, W0, LIG }, /* vfnmadd231sh */
+    { { 0xbe }, 2, T, R, pfx_66, W0, Ln }, /* vfnmsub231ph */
+    { { 0xbf }, 2, T, R, pfx_66, W0, LIG }, /* vfnmsub231sh */
 };
 
 static const struct {
@@ -2060,6 +2091,7 @@ static const struct {
     { evex_0f3a, ARRAY_SIZE(evex_0f3a) },
     { NULL,      0 },
     { evex_map5, ARRAY_SIZE(evex_map5) },
+    { evex_map6, ARRAY_SIZE(evex_map6) },
 };
 
 #undef Wn
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -1235,6 +1235,20 @@ int x86emul_decode(struct x86_emulate_st
                         d = twobyte_table[b].desc;
                         s->simd_size = twobyte_table[b].size ?: simd_other;
                         break;
+
+                    case evex_map6:
+                        if ( !evex_encoded() )
+                        {
+                            rc = X86EMUL_UNRECOGNIZED;
+                            goto done;
+                        }
+                        opcode |= MASK_INSR(6, X86EMUL_OPC_EXT_MASK);
+                        /*
+                         * Re-use twobyte_table[]'s 0x38 entry here, for the
+                         * similarity of the 0F38 entries with map 6.
+                         */
+                        d = twobyte_table[0x38].desc;
+                        break;
                     }
                 }
                 else if ( s->ext < ext_8f08 + ARRAY_SIZE(xop_table) )
@@ -1484,6 +1498,28 @@ int x86emul_decode(struct x86_emulate_st
             disp8scale = decode_disp8scale(twobyte_table[b].d8s, s);
             break;
 
+        case ext_map6:
+            /*
+             * Re-use ext0f38_table[] here, for the similarity of the entries
+             * valid in map 6.
+             */
+            d = ext0f38_table[b].to_mem ? DstMem | SrcReg
+                                        : DstReg | SrcMem;
+            if ( ext0f38_table[b].two_op )
+                d |= TwoOp;
+            s->simd_size = ext0f38_table[b].simd_size ?: simd_other;
+
+            switch ( b )
+            {
+            default:
+                if ( s->evex.pfx == vex_66 )
+                    s->fp16 = true;
+                break;
+            }
+
+            disp8scale = decode_disp8scale(ext0f38_table[b].d8s, s);
+            break;
+
         case ext_8f09:
             if ( ext8f09_table[b].two_op )
                 d |= TwoOp;
@@ -1703,6 +1739,7 @@ int x86emul_decode(struct x86_emulate_st
         break;
 
     case ext_map5:
+    case ext_map6:
     case ext_8f09:
     case ext_8f0a:
         break;
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -196,6 +196,7 @@ enum vex_opcx {
     vex_0f38,
     vex_0f3a,
     evex_map5 = 5,
+    evex_map6,
 };
 
 enum vex_pfx {
@@ -251,6 +252,7 @@ struct x86_emulate_state {
         ext_0f38 = vex_0f38,
         ext_0f3a = vex_0f3a,
         ext_map5 = evex_map5,
+        ext_map6 = evex_map6,
         /*
          * For XOP use values such that the respective instruction field
          * can be used without adjustment.
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7778,6 +7778,49 @@ x86_emulate(
         generate_exception_if(evex.w, EXC_UD);
         goto avx512f_all_fp;
 
+    case X86EMUL_OPC_EVEX_66(6, 0x96): /* vfmaddsub132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x97): /* vfmsubadd132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x98): /* vfmadd132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x9a): /* vfmsub132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x9c): /* vfnmadd132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x9e): /* vfnmsub132ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xa6): /* vfmaddsub213ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xa7): /* vfmsubadd213ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xa8): /* vfmadd213ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xaa): /* vfmsub213ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xac): /* vfnmadd213ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xae): /* vfnmsub213ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xb6): /* vfmaddsub231ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xb7): /* vfmsubadd231ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xb8): /* vfmadd231ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xba): /* vfmsub231ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xbc): /* vfnmadd231ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xbe): /* vfnmsub231ph 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w, EXC_UD);
+        if ( ea.type != OP_REG || !evex.brs )
+            avx512_vlen_check(false);
+        goto simd_zmm;
+
+    case X86EMUL_OPC_EVEX_66(6, 0x99): /* vfmadd132sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x9b): /* vfmsub132sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x9d): /* vfnmadd132sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0x9f): /* vfnmsub132sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xa9): /* vfmadd213sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xab): /* vfmsub213sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xad): /* vfnmadd213sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xaf): /* vfnmsub213sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xb9): /* vfmadd231sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xbb): /* vfmsub231sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xbd): /* vfnmadd231sh xmm/m16,xmm,xmm{k} */
+    case X86EMUL_OPC_EVEX_66(6, 0xbf): /* vfnmsub231sh xmm/m16,xmm,xmm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w || (ea.type != OP_REG && evex.brs),
+                              EXC_UD);
+        if ( !evex.brs )
+            avx512_vlen_check(true);
+        goto simd_zmm;
+
     case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -620,6 +620,7 @@ struct x86_emulate_ctxt
  *  0x0f38xxxx for 0f38-prefixed opcodes (or their VEX/EVEX equivalents)
  *  0x0f3axxxx for 0f3a-prefixed opcodes (or their VEX/EVEX equivalents)
  *     0x5xxxx for Map5 opcodes (EVEX only)
+ *     0x6xxxx for Map6 opcodes (EVEX only)
  *  0x8f08xxxx for 8f/8-prefixed XOP opcodes
  *  0x8f09xxxx for 8f/9-prefixed XOP opcodes
  *  0x8f0axxxx for 8f/a-prefixed XOP opcodes




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.