[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 14/18] x86emul: support SSE4.1 insns



... and their AVX equivalents.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v3: New.

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -214,6 +214,12 @@ enum simd_opsize {
      * - 32 bits with prefix F3 (scalar)
      */
     simd_single_fp,
+    /*
+     * Scalar floating point:
+     * - 32 bits with low opcode bit clear (scalar single)
+     * - 64 bits with low opcode bit set (scalar double)
+     */
+    simd_scalar_fp,
     /* Operand size encoded in non-standard way. */
     simd_other
 };
@@ -344,21 +350,45 @@ static const struct {
     uint8_t vsib:1;
 } ext0f38_table[256] = {
     [0x00 ... 0x0b] = { .simd_size = simd_packed_int },
+    [0x10] = { .simd_size = simd_packed_int },
+    [0x14 ... 0x15] = { .simd_size = simd_packed_fp },
+    [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
+    [0x28 ... 0x29] = { .simd_size = simd_packed_int },
     [0x2a] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x2b] = { .simd_size = simd_packed_int },
+    [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
+    [0x38 ... 0x3f] = { .simd_size = simd_packed_int },
+    [0x40] = { .simd_size = simd_packed_int },
+    [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0xf0] = { .two_op = 1 },
     [0xf1] = { .to_memory = 1, .two_op = 1 },
     [0xf2 ... 0xf3] = {},
     [0xf5 ... 0xf7] = {},
 };
 
+/* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
+static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 };
+
 static const struct {
     uint8_t simd_size:5;
     uint8_t to_memory:1;
     uint8_t two_op:1;
     uint8_t four_op:1;
 } ext0f3a_table[256] = {
-    [0x0f] = { .simd_size = simd_packed_int },
+    [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1 },
+    [0x0a ... 0x0b] = { .simd_size = simd_scalar_fp },
+    [0x0c ... 0x0d] = { .simd_size = simd_packed_fp },
+    [0x0e ... 0x0f] = { .simd_size = simd_packed_int },
+    [0x14 ... 0x17] = { .simd_size = simd_none, .to_memory = 1, .two_op = 1 },
+    [0x20] = { .simd_size = simd_none },
+    [0x21] = { .simd_size = simd_other },
+    [0x22] = { .simd_size = simd_none },
+    [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
+    [0x42] = { .simd_size = simd_packed_int },
+    [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
     [0xf0] = {},
 };
 
@@ -2308,6 +2338,33 @@ x86_decode_0f38(
 }
 
 static int
+x86_decode_0f3a(
+    struct x86_emulate_state *state,
+    struct x86_emulate_ctxt *ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    if ( !vex.opcx )
+        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+
+    switch ( ctxt->opcode & X86EMUL_OPC_MASK )
+    {
+    case X86EMUL_OPC_66(0, 0x20):     /* pinsrb */
+    case X86EMUL_OPC_VEX_66(0, 0x20): /* vpinsrb */
+        state->desc = DstImplicit | SrcMem;
+        if ( modrm_mod != 3 )
+            state->desc = ByteOp;
+        break;
+
+    case X86EMUL_OPC_66(0, 0x22):     /* pinsr{d,q} */
+    case X86EMUL_OPC_VEX_66(0, 0x22): /* vpinsr{d,q} */
+        state->desc = DstImplicit | SrcMem;
+        break;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int
 x86_decode(
     struct x86_emulate_state *state,
     struct x86_emulate_ctxt *ctxt,
@@ -2795,8 +2852,7 @@ x86_decode(
             imm1 &= 0x7f;
         state->desc = d;
         state->simd_size = ext0f3a_table[b].simd_size;
-        if ( !vex.opcx )
-            ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+        rc = x86_decode_0f3a(state, ctxt, ops);
         break;
 
     case ext_8f08:
@@ -2860,6 +2916,10 @@ x86_decode(
         }
         break;
 
+    case simd_scalar_fp:
+        op_bytes = 4 << (ctxt->opcode & 1);
+        break;
+
     default:
         op_bytes = 0;
         break;
@@ -5928,6 +5988,18 @@ x86_emulate(
     case X86EMUL_OPC_VEX_66(0x0f38, 0x1c): /* vpabsb {x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x1d): /* vpabsw {x,y}mm/mem,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x1e): /* vpabsd {x,y}mm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x28): /* vpmuldq 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x29): /* vpcmpeqq 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x2b): /* vpackusdw 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x38): /* vpminsb 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x39): /* vpminsd 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x3a): /* vpminub 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x3b): /* vpminud 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x3c): /* vpmaxsb 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x3d): /* vpmaxsd 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x3e): /* vpmaxub 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x3f): /* vpmaxud 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x40): /* vpmulld 
{x,y}mm/mem,{x,y}mm,{x,y}mm */
             if ( !vex.l )
                 goto simd_0f_avx;
             host_and_vcpu_must_have(avx2);
@@ -5940,6 +6012,10 @@ x86_emulate(
         get_fpu(X86EMUL_FPU_mmx, &fic);
         goto simd_0f_common;
 
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x41): /* vphminposuw xmm/m128,xmm,xmm */
+        generate_exception_if(vex.l, EXC_UD);
+        goto simd_0f_avx;
+
     CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
     case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
     CASE_SIMD_PACKED_INT(0x0f, 0x7e):    /* mov{d,q} {,x}mm,r/m */
@@ -6023,11 +6099,21 @@ x86_emulate(
     simd_0f_int_imm8:
         if ( vex.opcx != vex_none )
         {
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0e): /* vpblendw 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
             if ( vex.l )
                 host_and_vcpu_must_have(avx2);
             else
             {
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps 
$imm8,{x,y}mm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x09): /* vroundpd 
$imm8,{x,y}mm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0a): /* vroundss 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0b): /* vroundsd 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0c): /* vblendps 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x0d): /* vblendpd 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x40): /* vdpps 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x41): /* vdppd 
$imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
     simd_0f_imm8_avx:
                 host_and_vcpu_must_have(avx);
             }
@@ -6923,7 +7009,10 @@ x86_emulate(
     case X86EMUL_OPC_66(0x0f38, 0x1e): /* pabsd xmm/m128,xmm */
         host_and_vcpu_must_have(ssse3);
         if ( vex.pfx )
+        {
+    simd_0f38_common:
             get_fpu(X86EMUL_FPU_xmm, &fic);
+        }
         else
         {
             host_and_vcpu_must_have(mmx);
@@ -6943,6 +7032,96 @@ x86_emulate(
         fic.insn_bytes = PFX_BYTES + 3;
         break;
 
+    case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x23): /* pmovsxwd xmm/m64,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x24): /* pmovsxwq xmm/m32,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x25): /* pmovsxdq xmm/m64,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x30): /* pmovzxbw xmm/m64,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x31): /* pmovzxbd xmm/m32,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x32): /* pmovzxbq xmm/m16,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x33): /* pmovzxwd xmm/m64,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x34): /* pmovzxwq xmm/m32,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x35): /* pmovzxdq xmm/m64,xmm */
+        op_bytes = 16 >> pmov_convert_delta[b & 7];
+        /* fall through */
+    case X86EMUL_OPC_66(0x0f38, 0x10): /* pblendvb XMM0,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x14): /* blendvps XMM0,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x15): /* blendvpd XMM0,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x28): /* pmuldq xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x29): /* pcmpeqq xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x2b): /* packusdw xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x38): /* pminsb xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x39): /* pminsd xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x3a): /* pminub xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x3b): /* pminud xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x3c): /* pmaxsb xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x3d): /* pmaxsd xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x3e): /* pmaxub xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x3f): /* pmaxud xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x40): /* pmulld xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f38, 0x41): /* phminposuw xmm/m128,xmm */
+        host_and_vcpu_must_have(sse4_1);
+        goto simd_0f38_common;
+
+    case X86EMUL_OPC_VEX(0x0f38, 0x17):    /* ptest xmm/m128,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x17): /* vptest {x,y}mm/mem,{x,y}mm */
+        if ( vex.opcx == vex_none )
+        {
+            host_and_vcpu_must_have(sse4_1);
+            get_fpu(X86EMUL_FPU_xmm, &fic);
+        }
+        else
+        {
+            generate_exception_if(vex.reg != 0xf, EXC_UD);
+            host_and_vcpu_must_have(avx);
+            get_fpu(X86EMUL_FPU_ymm, &fic);
+        }
+
+        opc = init_prefixes(stub);
+        if ( vex.opcx == vex_none )
+            opc[0] = 0x38;
+        opc[vex.opcx == vex_none] = b;
+        opc[1 + (vex.opcx == vex_none)] = modrm;
+        if ( ea.type == OP_MEM )
+        {
+            rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16 << vex.l, ctxt);
+            if ( rc != X86EMUL_OKAY )
+                goto done;
+
+            /* Convert memory operand to (%rAX). */
+            rex_prefix &= ~REX_B;
+            vex.b = 1;
+            opc[1 + (vex.opcx == vex_none)] &= 0x38;
+        }
+        fic.insn_bytes = PFX_BYTES + 2 + (vex.opcx == vex_none);
+        opc[2 + (vex.opcx == vex_none)] = 0xc3;
+
+        emulate_stub("+m" (*mmvalp), "a" (mmvalp));
+
+        put_stub(stub);
+        put_fpu(&fic);
+
+        state->simd_size = simd_none;
+        dst.type = OP_NONE;
+        break;
+
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x23): /* vpmovsxwd xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x25): /* vpmovsxdq xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x30): /* vpmovzxbw xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x33): /* vpmovzxwd xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x35): /* vpmovzxdq xmm/mem,{x,y}mm */
+        op_bytes = 16 >> (pmov_convert_delta[b & 7] - vex.l);
+        goto simd_0f_int;
+
     case X86EMUL_OPC_66(0x0f38, 0x2a):     /* movntdqa m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
         generate_exception_if(ea.type != OP_MEM, EXC_UD);
@@ -7128,7 +7307,10 @@ x86_emulate(
     case X86EMUL_OPC_66(0x0f3a, 0x0f): /* palignr $imm8,xmm/m128,xmm */
         host_and_vcpu_must_have(ssse3);
         if ( vex.pfx )
+        {
+    simd_0f3a_common:
             get_fpu(X86EMUL_FPU_xmm, &fic);
+        }
         else
         {
             host_and_vcpu_must_have(mmx);
@@ -7149,6 +7331,101 @@ x86_emulate(
         fic.insn_bytes = PFX_BYTES + 4;
         break;
 
+    case X86EMUL_OPC_66(0x0f3a, 0x08): /* roundps $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x09): /* roundpd $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x0a): /* roundss $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x0b): /* roundsd $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x0c): /* blendps $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x0d): /* blendpd $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x0e): /* pblendw $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x40): /* dpps $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x41): /* dppd $imm8,xmm/m128,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x42): /* mpsadbw $imm8,xmm/m128,xmm */
+        host_and_vcpu_must_have(sse4_1);
+        goto simd_0f3a_common;
+
+    case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
+    case X86EMUL_OPC_66(0x0f3a, 0x15): /* pextrw $imm8,xmm,r/m */
+    case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */
+    case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */
+        host_and_vcpu_must_have(sse4_1);
+        get_fpu(X86EMUL_FPU_xmm, &fic);
+
+        opc = init_prefixes(stub);
+        opc[0] = 0x3a;
+        fic.insn_bytes = PFX_BYTES + 3;
+    pextr:
+        opc[fic.insn_bytes - PFX_BYTES - 2] = b;
+        /* Convert memory/GPR operand to (%rAX). */
+        rex_prefix &= ~REX_B;
+        vex.b = 1;
+        if ( !mode_64bit() )
+            vex.w = 0;
+        opc[fic.insn_bytes - PFX_BYTES - 1] = modrm & 0x38;
+        opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
+
+        copy_REX_VEX(opc, rex_prefix, vex);
+        invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
+
+        put_stub(stub);
+        put_fpu(&fic);
+
+        dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
+        if ( b == 0x16 && (rex_prefix & REX_W) )
+            dst.bytes = 8;
+        break;
+
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
+        generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
+        host_and_vcpu_must_have(avx);
+        get_fpu(X86EMUL_FPU_ymm, &fic);
+        opc = init_prefixes(stub);
+        fic.insn_bytes = PFX_BYTES + 2;
+        goto pextr;
+
+    case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
+    case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
+        host_and_vcpu_must_have(sse4_1);
+        get_fpu(X86EMUL_FPU_xmm, &fic);
+        memcpy(mmvalp, &src.val, op_bytes);
+        ea.type = OP_MEM;
+        op_bytes = src.bytes;
+        d = SrcMem16; /* Fake for the common SIMD code below. */
+        state->simd_size = simd_other;
+        goto simd_0f3a_common;
+
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */
+        generate_exception_if(vex.l, EXC_UD);
+        memcpy(mmvalp, &src.val, op_bytes);
+        ea.type = OP_MEM;
+        op_bytes = src.bytes;
+        d = SrcMem16; /* Fake for the common SIMD code below. */
+        state->simd_size = simd_other;
+        goto simd_0f_int_imm8;
+
+    case X86EMUL_OPC_66(0x0f3a, 0x21): /* insertps $imm8,xmm/m32,xmm */
+        host_and_vcpu_must_have(sse4_1);
+        op_bytes = 4;
+        goto simd_0f3a_common;
+
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m128,xmm,xmm 
*/
+        op_bytes = 4;
+        generate_exception_if(vex.l, EXC_UD);
+        goto simd_0f_imm8_avx;
+
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4a): /* vblendvps 
{x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4b): /* vblendvpd 
{x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+        generate_exception_if(vex.w, EXC_UD);
+        goto simd_0f_imm8_avx;
+
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x4c): /* vpblendvb 
{x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+        generate_exception_if(vex.w, EXC_UD);
+        goto simd_0f_int_imm8;
+
     case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
         vcpu_must_have(bmi2);
         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -40,6 +40,7 @@
 #define cpu_has_mmx            boot_cpu_has(X86_FEATURE_MMX)
 #define cpu_has_sse3           boot_cpu_has(X86_FEATURE_SSE3)
 #define cpu_has_ssse3          boot_cpu_has(X86_FEATURE_SSSE3)
+#define cpu_has_sse4_1         boot_cpu_has(X86_FEATURE_SSE4_1)
 #define cpu_has_sse4_2         boot_cpu_has(X86_FEATURE_SSE4_2)
 #define cpu_has_popcnt         boot_cpu_has(X86_FEATURE_POPCNT)
 #define cpu_has_htt            boot_cpu_has(X86_FEATURE_HTT)


Attachment: x86emul-SSE41.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.