[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 02/10] x86emul: handle AVX512-FP16 Map5 arithmetic insns


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Mon, 3 Apr 2023 16:57:25 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=ssUjHs4HRn1b92pvEP2D8LrncOpBCibcR/DS6rD4XMw=; b=BrQX8T1/bUH8Xy6WLQ/2NC62yROU8KjcQ9L/LAJtMhbEeqs6NNLPfZbLVVvPpOWAIjv+A5bAtgMblDnQYMhhQoY4FeaE1pgdzAuIUPbxQj6z0h8M4MAsiwvS3ZE+n30xAKBkURPkRbMSjWDmNppuW9zSP7pwAJh6oqK91nH8QqgB33Zp74E21P6dSUPoGTuHbNtAnQBlNwT3C2jiwOC062XtXFb3v0k2iSOlxXHf44sEhwFoS2DWkjcytPmzsakTeZJjANc1Pu+UGFQKDzE2//A2jkWE75WnO67vJjjvsYamHjOQQUTmbL8lTH22hmdi6etigxy4cAWgZZ8V8OAHtA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=UMX/oiOvfeZBVNHcbouIPbM3AbxdRxjiOtL9x3JLGk1sZoQ9BtjJpE5Rr9HDWw8HphwMd/jYD22Udb1uZTT9LHQPHCKcDFGkW9GZrrPBEJOye87zWjTSqaMi4ML3mO3WGkkNRDojBNk9klh8o5XBpdipMkqKaBjUoJ3psIxWLfCclwgMBSCWf0m2AJOx3lQQ03busDcU4sysBO3Pm3dXGgIkUWiJzqBGIbWjfqV/ZPGjxIyqveHBe3pWZyeBJ0wr1Y1ETOz5ewAsSxVQXM4aKUmKb3bawOvzlzUa+HMGTiUuV6z1+1K6u1HDY7OKR/sCSD2Umt72z49UNypny8A4Tg==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Mon, 03 Apr 2023 14:57:42 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

This encoding space is a very sparse clone of the "twobyte" one. Re-use
that table, as the entries corresponding to invalid opcodes in Map5 are
simply benign with simd_size forced to other than simd_none (preventing
undue memory reads in SrcMem handling early in x86_emulate()).

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v2: Add comments.

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -6,7 +6,7 @@
 struct test {
     const char *mnemonic;
     unsigned int opc:8;
-    unsigned int spc:2;
+    unsigned int spc:3;
     unsigned int pfx:2;
     unsigned int vsz:3;
     unsigned int esz:4;
@@ -19,6 +19,10 @@ enum spc {
     SPC_0f,
     SPC_0f38,
     SPC_0f3a,
+    SPC_unused4,
+    SPC_map5,
+    SPC_map6,
+    SPC_unused7,
 };
 
 enum pfx {
@@ -603,16 +607,32 @@ static const struct test avx512_vpopcntd
 };
 
 static const struct test avx512_fp16_all[] = {
+    INSN(addph,           , map5, 58,    vl, fp16, vl),
+    INSN(addsh,         f3, map5, 58,    el, fp16, el),
     INSN(cmpph,           , 0f3a, c2,    vl, fp16, vl),
     INSN(cmpsh,         f3, 0f3a, c2,    el, fp16, el),
+    INSN(comish,          , map5, 2f,    el, fp16, el),
+    INSN(divph,           , map5, 5e,    vl, fp16, vl),
+    INSN(divsh,         f3, map5, 5e,    el, fp16, el),
     INSN(fpclassph,       , 0f3a, 66,    vl, fp16, vl),
     INSN(fpclasssh,       , 0f3a, 67,    el, fp16, el),
     INSN(getmantph,       , 0f3a, 26,    vl, fp16, vl),
     INSN(getmantsh,       , 0f3a, 27,    el, fp16, el),
+    INSN(maxph,           , map5, 5f,    vl, fp16, vl),
+    INSN(maxsh,         f3, map5, 5f,    el, fp16, el),
+    INSN(minph,           , map5, 5d,    vl, fp16, vl),
+    INSN(minsh,         f3, map5, 5d,    el, fp16, el),
+    INSN(mulph,           , map5, 59,    vl, fp16, vl),
+    INSN(mulsh,         f3, map5, 59,    el, fp16, el),
     INSN(reduceph,        , 0f3a, 56,    vl, fp16, vl),
     INSN(reducesh,        , 0f3a, 57,    el, fp16, el),
     INSN(rndscaleph,      , 0f3a, 08,    vl, fp16, vl),
     INSN(rndscalesh,      , 0f3a, 0a,    el, fp16, el),
+    INSN(sqrtph,          , map5, 51,    vl, fp16, vl),
+    INSN(sqrtsh,        f3, map5, 51,    el, fp16, el),
+    INSN(subph,           , map5, 5c,    vl, fp16, vl),
+    INSN(subsh,         f3, map5, 5c,    el, fp16, el),
+    INSN(ucomish,         , map5, 2e,    el, fp16, el),
 };
 
 static const struct test gfni_all[] = {
@@ -713,8 +733,8 @@ static void test_one(const struct test *
     union evex {
         uint8_t raw[3];
         struct {
-            uint8_t opcx:2;
-            uint8_t mbz:2;
+            uint8_t opcx:3;
+            uint8_t mbz:1;
             uint8_t R:1;
             uint8_t b:1;
             uint8_t x:1;
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2028,6 +2028,23 @@ static const struct evex {
     { { 0xc2 }, 3, T, R, pfx_f3, W0, LIG }, /* vcmpsh */
     { { 0xce }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineqb */
     { { 0xcf }, 3, T, R, pfx_66, W1, Ln }, /* vgf2p8affineinvqb */
+}, evex_map5[] = {
+    { { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
+    { { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
+    { { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
+    { { 0x51 }, 2, T, R, pfx_f3, W0, LIG }, /* vsqrtsh */
+    { { 0x58 }, 2, T, R, pfx_no, W0, Ln }, /* vaddph */
+    { { 0x58 }, 2, T, R, pfx_f3, W0, LIG }, /* vaddsh */
+    { { 0x59 }, 2, T, R, pfx_no, W0, Ln }, /* vmulph */
+    { { 0x59 }, 2, T, R, pfx_f3, W0, LIG }, /* vmulsh */
+    { { 0x5c }, 2, T, R, pfx_no, W0, Ln }, /* vsubph */
+    { { 0x5c }, 2, T, R, pfx_f3, W0, LIG }, /* vsubsh */
+    { { 0x5d }, 2, T, R, pfx_no, W0, Ln }, /* vminph */
+    { { 0x5d }, 2, T, R, pfx_f3, W0, LIG }, /* vminsh */
+    { { 0x5e }, 2, T, R, pfx_no, W0, Ln }, /* vdivph */
+    { { 0x5e }, 2, T, R, pfx_f3, W0, LIG }, /* vdivsh */
+    { { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */
+    { { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */
 };
 
 static const struct {
@@ -2037,6 +2054,8 @@ static const struct {
     { evex_0f,   ARRAY_SIZE(evex_0f) },
     { evex_0f38, ARRAY_SIZE(evex_0f38) },
     { evex_0f3a, ARRAY_SIZE(evex_0f3a) },
+    { NULL,      0 },
+    { evex_map5, ARRAY_SIZE(evex_map5) },
 };
 
 #undef Wn
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -1219,9 +1219,22 @@ int x86emul_decode(struct x86_emulate_st
                         opcode |= MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
                         d = twobyte_table[0x3a].desc;
                         break;
+
+                    case evex_map5:
+                        if ( !evex_encoded() )
+                        {
                     default:
-                        rc = X86EMUL_UNRECOGNIZED;
-                        goto done;
+                            rc = X86EMUL_UNRECOGNIZED;
+                            goto done;
+                        }
+                        opcode |= MASK_INSR(5, X86EMUL_OPC_EXT_MASK);
+                        /*
+                         * Re-use twobyte_table[] here, for the similarity of
+                         * the entries valid in map 5.
+                         */
+                        d = twobyte_table[b].desc;
+                        s->simd_size = twobyte_table[b].size ?: simd_other;
+                        break;
                     }
                 }
                 else if ( s->ext < ext_8f08 + ARRAY_SIZE(xop_table) )
@@ -1443,6 +1456,25 @@ int x86emul_decode(struct x86_emulate_st
             }
             break;
 
+        case ext_map5:
+            switch ( b )
+            {
+            default:
+                if ( !(s->evex.pfx & VEX_PREFIX_DOUBLE_MASK) )
+                    s->fp16 = true;
+                break;
+
+            case 0x2e: case 0x2f: /* v{,u}comish */
+                if ( !s->evex.pfx )
+                    s->fp16 = true;
+                s->simd_size = simd_none;
+                break;
+            }
+
+            /* Like above re-use twobyte_table[] here. */
+            disp8scale = decode_disp8scale(twobyte_table[b].d8s, s);
+            break;
+
         case ext_8f09:
             if ( ext8f09_table[b].two_op )
                 d |= TwoOp;
@@ -1661,6 +1693,7 @@ int x86emul_decode(struct x86_emulate_st
         s->simd_size = ext8f08_table[b].simd_size;
         break;
 
+    case ext_map5:
     case ext_8f09:
     case ext_8f0a:
         break;
--- a/xen/arch/x86/x86_emulate/private.h
+++ b/xen/arch/x86/x86_emulate/private.h
@@ -195,6 +195,7 @@ enum vex_opcx {
     vex_0f = vex_none + 1,
     vex_0f38,
     vex_0f3a,
+    evex_map5 = 5,
 };
 
 enum vex_pfx {
@@ -223,8 +224,8 @@ union vex {
 union evex {
     uint8_t raw[3];
     struct {             /* SDM names */
-        uint8_t opcx:2;  /* mm */
-        uint8_t mbz:2;
+        uint8_t opcx:3;  /* mmm */
+        uint8_t mbz:1;
         uint8_t R:1;     /* R' */
         uint8_t b:1;     /* B */
         uint8_t x:1;     /* X */
@@ -249,6 +250,7 @@ struct x86_emulate_state {
         ext_0f   = vex_0f,
         ext_0f38 = vex_0f38,
         ext_0f3a = vex_0f3a,
+        ext_map5 = evex_map5,
         /*
          * For XOP use values such that the respective instruction field
          * can be used without adjustment.
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3756,6 +3756,13 @@ x86_emulate(
         ASSERT(!state->simd_size);
         break;
 
+#ifndef X86EMUL_NO_SIMD
+
+    case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */
+    case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w, EXC_UD);
+        /* fall through */
     CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
     CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
         generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
@@ -3768,9 +3775,11 @@ x86_emulate(
         get_fpu(X86EMUL_FPU_zmm);
 
         opc = init_evex(stub);
-        op_bytes = 4 << evex.w;
+        op_bytes = 2 << (!state->fp16 + evex.w);
         goto vcomi;
 
+#endif
+
     case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
         generate_exception_if(!mode_ring0(), EXC_GP, 0);
         fail_if(ops->write_msr == NULL);
@@ -7736,6 +7745,20 @@ x86_emulate(
 
 #ifndef X86EMUL_NO_SIMD
 
+    case X86EMUL_OPC_EVEX_F3(5, 0x51):   /* vsqrtsh xmm/m16,xmm,xmm{k} */
+        d &= ~TwoOp;
+        /* fall through */
+    case X86EMUL_OPC_EVEX(5, 0x51):      /* vsqrtph [xyz]mm/mem,[xyz]mm{k} */
+    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x58): /* vadd{p,s}h 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x59): /* vmul{p,s}h 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5c): /* vsub{p,s}h 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5d): /* vmin{p,s}h 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5e): /* vdiv{p,s}h 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+    CASE_SIMD_SINGLE_FP(_EVEX, 5, 0x5f): /* vmax{p,s}h 
[xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+        host_and_vcpu_must_have(avx512_fp16);
+        generate_exception_if(evex.w, EXC_UD);
+        goto avx512f_all_fp;
+
     case X86EMUL_OPC_XOP(08, 0x85): /* vpmacssww xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x86): /* vpmacsswd xmm,xmm/m128,xmm,xmm */
     case X86EMUL_OPC_XOP(08, 0x87): /* vpmacssdql xmm,xmm/m128,xmm,xmm */
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -619,6 +619,7 @@ struct x86_emulate_ctxt
  *    0x0fxxxx for 0f-prefixed opcodes (or their VEX/EVEX equivalents)
  *  0x0f38xxxx for 0f38-prefixed opcodes (or their VEX/EVEX equivalents)
  *  0x0f3axxxx for 0f3a-prefixed opcodes (or their VEX/EVEX equivalents)
+ *     0x5xxxx for Map5 opcodes (EVEX only)
  *  0x8f08xxxx for 8f/8-prefixed XOP opcodes
  *  0x8f09xxxx for 8f/9-prefixed XOP opcodes
  *  0x8f0axxxx for 8f/a-prefixed XOP opcodes




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.