[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] x86/emulator: generalize movq emulation (SSE2 and AVX variants)


  • To: Jan Beulich <JBeulich@xxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Keir Fraser <keir@xxxxxxx>
  • Date: Wed, 16 Nov 2011 16:25:59 +0000
  • Cc:
  • Delivery-date: Wed, 16 Nov 2011 08:27:43 -0800
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AcykfGx+PpuAFAnEI0mY+7BYvqEBYA==
  • Thread-topic: [Xen-devel] [PATCH 2/2] x86/emulator: generalize movq emulation (SSE2 and AVX variants)

On 16/11/2011 14:00, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:

> Extend the existing movq emulation to also support its SSE2 and AVX
> variants, the latter implying the addition of VEX decoding. Fold the
> read and write cases (as most of the logic is identical), and add
> movntq and variants (as they're very similar).
> 
> Extend the testing code to also exercise these instructions.

I checked in your other patches, although I split them up and revised them
in some cases.

This one is broadly okay too, but:

 1. Don't import vm86_mode(). x86_emulate already does eflags&EFLG_VM in
some places. And that's fairly self documenting so just carry on with that.

 2. Don't import DEFINE_PER_CPU/this_cpu. I understand it works around a
critical issue but it's *so* nasty. I would rather define a nasty private
macro for declaring aligned space on the stack, like, for example:
   char __mmval[64], *mmval = (__mmval + 31) & ~32;
(suitably cleaned up, macroised, and made compilable of course ;-)

 3. There's a XXX'ed chunk of code in the middle of the patch. No
explanation. Remove it, or comment it, or something.

Note that I changed the vcpu_must_have stuff when I check it in, so those
bits will need fixup in this patch too. In particular, I don't bother
importing cpufeature.h -- the leaf/reg are already open coded with no macro
abstraction, so I see no harm in open-coding the bit number either. They
won't change and the vcpu_must_have_xxx macro name is sufficient
documentation in itself.

 -- Keir

> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -1,3 +1,5 @@
> +#include <errno.h>
> +#include <stdbool.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -53,11 +55,84 @@ static int cmpxchg(
>      return X86EMUL_OKAY;
>  }
>  
> +static int cpuid(
> +    unsigned int *eax,
> +    unsigned int *ebx,
> +    unsigned int *ecx,
> +    unsigned int *edx,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx));
> +    return X86EMUL_OKAY;
> +}
> +
> +#define cpu_has_mmx ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &ecx, &ecx, &edx, NULL); \
> +    (edx & (1U << 23)) != 0; \
> +})
> +
> +#define cpu_has_sse ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &ecx, &ecx, &edx, NULL); \
> +    (edx & (1U << 25)) != 0; \
> +})
> +
> +#define cpu_has_sse2 ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &ecx, &ecx, &edx, NULL); \
> +    (edx & (1U << 26)) != 0; \
> +})
> +
> +static inline uint64_t xgetbv(uint32_t xcr)
> +{
> +    uint64_t res;
> +
> +    asm ( ".byte 0x0f, 0x01, 0xd0" : "=A" (res) : "c" (xcr) );
> +
> +    return res;
> +}
> +
> +#define cpu_has_avx ({ \
> +    unsigned int eax = 1, ecx = 0, edx; \
> +    cpuid(&eax, &edx, &ecx, &edx, NULL); \
> +    if ( !(ecx & (1U << 27)) || (xgetbv(0) & 6) != 6 ) \
> +        ecx = 0; \
> +    (ecx & (1U << 28)) != 0; \
> +})
> +
> +int get_fpu(
> +    void (*exception_callback)(void *, struct cpu_user_regs *),
> +    void *exception_callback_arg,
> +    enum x86_emulate_fpu_type type,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    switch ( type )
> +    {
> +    case X86EMUL_FPU_fpu:
> +        break;
> +    case X86EMUL_FPU_ymm:
> +        if ( cpu_has_avx )
> +            break;
> +    case X86EMUL_FPU_xmm:
> +        if ( cpu_has_sse )
> +            break;
> +    case X86EMUL_FPU_mmx:
> +        if ( cpu_has_mmx )
> +            break;
> +    default:
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +    return X86EMUL_OKAY;
> +}
> +
>  static struct x86_emulate_ops emulops = {
>      .read       = read,
>      .insn_fetch = read,
>      .write      = write,
>      .cmpxchg    = cmpxchg,
> +    .cpuid      = cpuid,
> +    .get_fpu    = get_fpu,
>  };
>  
>  int main(int argc, char **argv)
> @@ -66,6 +141,8 @@ int main(int argc, char **argv)
>      struct cpu_user_regs regs;
>      char *instr;
>      unsigned int *res, i, j;
> +    unsigned long sp;
> +    bool stack_exec;
>      int rc;
>  #ifndef __x86_64__
>      unsigned int bcdres_native, bcdres_emul;
> @@ -85,6 +162,16 @@ int main(int argc, char **argv)
>      }
>      instr = (char *)res + 0x100;
>  
> +#ifdef __x86_64__
> +    asm ("movq %%rsp, %0" : "=g" (sp));
> +#else
> +    asm ("movl %%esp, %0" : "=g" (sp));
> +#endif
> +    stack_exec = mprotect((void *)(sp & -0x1000L) - (MMAP_SZ - 0x1000),
> +                          MMAP_SZ, PROT_READ|PROT_WRITE|PROT_EXEC) == 0;
> +    if ( !stack_exec )
> +        printf("Warning: Stack could not be made executable (%d).\n", errno);
> +
>      printf("%-40s", "Testing addl %%ecx,(%%eax)...");
>      instr[0] = 0x01; instr[1] = 0x08;
>      regs.eflags = 0x200;
> @@ -442,6 +529,108 @@ int main(int argc, char **argv)
>      printf("skipped\n");
>  #endif
>  
> +    printf("%-40s", "Testing movq %mm3,(%ecx)...");
> +    if ( stack_exec && cpu_has_mmx )
> +    {
> +        extern const unsigned char movq_to_mem[];
> +
> +        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movq_to_mem: movq %%mm3, (%0)\n"
> +                       ".popsection" :: "c" (NULL) );
> +
> +        memcpy(instr, movq_to_mem, 15);
> +        memset(res, 0x33, 64);
> +        memset(res + 8, 0xff, 8);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( (rc != X86EMUL_OKAY) ||
> +             memcmp(res, res + 8, 32) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing movq (%edx),%mm5...");
> +    if ( stack_exec && cpu_has_mmx )
> +    {
> +        extern const unsigned char movq_from_mem[];
> +
> +        asm volatile ( "pcmpgtb %%mm5, %%mm5\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movq_from_mem: movq (%0), %%mm5\n"
> +                       ".popsection" :: "d" (NULL) );
> +
> +        memcpy(instr, movq_from_mem, 15);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = 0;
> +        regs.edx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY )
> +            goto fail;
> +        asm ( "pcmpeqb %%mm3, %%mm3\n\t"
> +              "pcmpeqb %%mm5, %%mm3\n\t"
> +              "pmovmskb %%mm3, %0" : "=r" (rc) );
> +        if ( rc != 0xff )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
> +    if ( stack_exec && cpu_has_sse2 )
> +    {
> +        extern const unsigned char movdqu_to_mem[];
> +
> +        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movdqu_to_mem: movdqu %%xmm2, (%0)\n"
> +                       ".popsection" :: "c" (NULL) );
> +
> +        memcpy(instr, movdqu_to_mem, 15);
> +        memset(res, 0x55, 64);
> +        memset(res + 8, 0xff, 16);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( (rc != X86EMUL_OKAY) ||
> +             memcmp(res, res + 8, 32) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing movdqu (%edx),%xmm4...");
> +    if ( stack_exec && cpu_has_sse2 )
> +    {
> +        extern const unsigned char movdqu_from_mem[];
> +
> +        asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n"
> +                       ".pushsection .test, \"a\", @progbits\n"
> +                       "movdqu_from_mem: movdqu (%0), %%xmm4\n"
> +                       ".popsection" :: "d" (NULL) );
> +
> +        memcpy(instr, movdqu_from_mem, 15);
> +        regs.eip    = (unsigned long)&instr[0];
> +        regs.ecx    = 0;
> +        regs.edx    = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY )
> +            goto fail;
> +        asm ( "pcmpeqb %%xmm2, %%xmm2\n\t"
> +              "pcmpeqb %%xmm4, %%xmm2\n\t"
> +              "pmovmskb %%xmm2, %0" : "=r" (rc) );
> +        if ( rc != 0xffff )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
>      for ( j = 1; j <= 2; j++ )
>      {
>  #if defined(__i386__)
> --- a/tools/tests/x86_emulator/x86_emulate.c
> +++ b/tools/tests/x86_emulator/x86_emulate.c
> @@ -9,5 +9,10 @@ typedef bool bool_t;
>  
>  #define BUG() abort()
>  
> +#define DEFINE_PER_CPU(type, var) type this_cpu_##var
> +#define this_cpu(var) this_cpu_##var
> +
> +#define vm86_mode(regs) 0
> +
>  #include "x86_emulate/x86_emulate.h"
>  #include "x86_emulate/x86_emulate.c"
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -16,6 +16,7 @@
>  #include <xen/paging.h>
>  #include <xen/trace.h>
>  #include <asm/event.h>
> +#include <asm/xstate.h>
>  #include <asm/hvm/emulate.h>
>  #include <asm/hvm/hvm.h>
>  #include <asm/hvm/trace.h>
> @@ -905,6 +906,20 @@ static int hvmemul_get_fpu(
>          if ( !cpu_has_mmx )
>              return X86EMUL_UNHANDLEABLE;
>          break;
> +    case X86EMUL_FPU_xmm:
> +        if ( !cpu_has_xmm ||
> +             (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) ||
> +             !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSFXSR) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +    case X86EMUL_FPU_ymm:
> +        if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
> +             vm86_mode(ctxt->regs) ||
> +             !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ||
> +             !(curr->arch.xcr0 & XSTATE_SSE) ||
> +             !(curr->arch.xcr0 & XSTATE_YMM) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
>      default:
>          return X86EMUL_UNHANDLEABLE;
>      }
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -253,6 +253,47 @@ static uint8_t twobyte_table[256] = {
>      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
>  };
>  
> +#define REX_PREFIX 0x40
> +#define REX_B 0x01
> +#define REX_X 0x02
> +#define REX_R 0x04
> +#define REX_W 0x08
> +
> +#define vex_none 0
> +
> +enum vex_opcx {
> +    vex_0f = vex_none + 1,
> +    vex_0f38,
> +    vex_0f3a,
> +};
> +
> +enum vex_pfx {
> +    vex_66 = vex_none + 1,
> +    vex_f3,
> +    vex_f2
> +};
> +
> +union vex {
> +    uint8_t raw[2];
> +    struct {
> +        uint8_t opcx:5;
> +        uint8_t b:1;
> +        uint8_t x:1;
> +        uint8_t r:1;
> +        uint8_t pfx:2;
> +        uint8_t l:1;
> +        uint8_t reg:4;
> +        uint8_t w:1;
> +    };
> +};
> +
> +#define copy_REX_VEX(ptr, rex, vex) do { \
> +    if ( (vex).opcx != vex_none ) \
> +        ptr[0] = 0xc4, ptr[1] = (vex).raw[0], ptr[2] = (vex).raw[1]; \
> +    else if ( mode_64bit() ) \
> +        ptr[1] = rex | REX_PREFIX; \
> +} while (0)
> +
>  /* Type, address-of, and value of an instruction's operand. */
>  struct operand {
>      enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
> @@ -281,6 +322,18 @@ struct operand {
>      };
>  };
>  
> +typedef union {
> +    uint64_t mmx;
> +    uint64_t __attribute__ ((aligned(16))) xmm[2];
> +    uint64_t __attribute__ ((aligned(32))) ymm[4];
> +} mmval_t;
> +
> +/*
> + * While alignment gets specified above, this doesn't get honored by the
> + * compiler for automatic variables. Thus use a per-CPU variable instead.
> + */
> +static DEFINE_PER_CPU(mmval_t, mmval);
> +
>  /* MSRs. */
>  #define MSR_TSC          0x00000010
>  #define MSR_SYSENTER_CS  0x00000174
> @@ -972,9 +1025,12 @@ static bool_t vcpu_has(
>      generate_exception_if(!vcpu_has(leaf, subleaf, reg, \
>                                      X86_FEATURE_##feature % 32, \
>                                      ctxt, ops), EXC_UD, -1)
> +#define vcpu_must_have_mmx()  vcpu_must_have(1, 0, EDX, MMX)
> +#define vcpu_must_have_sse()  vcpu_must_have(1, 0, EDX, XMM)
>  #define vcpu_must_have_sse2() vcpu_must_have(1, 0, EDX, XMM2)
>  #define vcpu_must_have_sse3() vcpu_must_have(1, 0, ECX, XMM3)
>  #define vcpu_must_have_cx16() vcpu_must_have(1, 0, ECX, CX16)
> +#define vcpu_must_have_avx()  vcpu_must_have(1, 0, ECX, AVX)
>  
>  static int
>  in_realmode(
> @@ -1255,6 +1311,7 @@ x86_emulate(
>  
>      uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
>      uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
> +    union vex vex = {};
>      unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
>  #define REPE_PREFIX  1
>  #define REPNE_PREFIX 2
> @@ -1287,6 +1344,7 @@ x86_emulate(
>          {
>          case 0x66: /* operand-size override */
>              op_bytes = def_op_bytes ^ 6;
> +            vex.pfx = vex_66;
>              break;
>          case 0x67: /* address-size override */
>              ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
> @@ -1314,9 +1372,11 @@ x86_emulate(
>              break;
>          case 0xf2: /* REPNE/REPNZ */
>              rep_prefix = REPNE_PREFIX;
> +            vex.pfx = vex_f2;
>              break;
>          case 0xf3: /* REP/REPE/REPZ */
>              rep_prefix = REPE_PREFIX;
> +            vex.pfx = vex_f3;
>              break;
>          case 0x40 ... 0x4f: /* REX */
>              if ( !mode_64bit() )
> @@ -1360,6 +1420,70 @@ x86_emulate(
>      {
>          modrm = insn_fetch_type(uint8_t);
>          modrm_mod = (modrm & 0xc0) >> 6;
> +
> +        if ( !twobyte && (b & ~1) == 0xc4 )
> +            switch ( def_ad_bytes )
> +            {
> +            default:
> +                BUG();
> +            case 2:
> +                if ( in_realmode(ctxt, ops) || vm86_mode(&_regs) )
> +                    break;
> +                /* fall through */
> +            case 4:
> +                if ( modrm_mod != 3 )
> +                    break;
> +                /* fall through */
> +            case 8:
> +                /* VEX */
> +                generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
> +
> +                vex.raw[0] = b;
> +                if ( b & 1 )
> +                {
> +                    vex.raw[1] = b;
> +                    vex.opcx = vex_0f;
> +                    vex.x = 1;
> +                    vex.b = 1;
> +                    vex.w = 0;
> +                }
> +                else
> +                {
> +                    vex.raw[1] = insn_fetch_type(uint8_t);
> +                    if ( mode_64bit() )
> +                    {
> +                        if ( !vex.b )
> +                            rex_prefix |= REX_B;
> +                        if ( !vex.x )
> +                            rex_prefix |= REX_X;
> +                        if ( vex.w )
> +                        {
> +                            rex_prefix |= REX_W;
> +                            op_bytes = 8;
> +                        }
> +                    }
> +                }
> +                vex.reg ^= 0xf;
> +                if ( !mode_64bit() )
> +                    vex.reg &= 0x7;
> +                else if ( !vex.r )
> +                    rex_prefix |= REX_R;
> +
> +                fail_if(vex.opcx != vex_0f);
> +                twobyte = 1;
> +                b = insn_fetch_type(uint8_t);
> +                d = twobyte_table[b];
> +
> +                /* Unrecognised? */
> +                if ( d == 0 )
> +                    goto cannot_emulate;
> +
> +                modrm = insn_fetch_type(uint8_t);
> +                modrm_mod = (modrm & 0xc0) >> 6;
> +
> +                break;
> +            }
> +
>          modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3);
>          modrm_rm  = modrm & 0x07;
>  
> @@ -3917,44 +4041,77 @@ x86_emulate(
>          break;
>      }
>  
> -    case 0x6f: /* movq mm/m64,mm */ {
> -        uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 };
> +    case 0x6f: /* movq mm/m64,mm */
> +               /* {,v}movdq{a,u} xmm/m128,xmm */
> +               /* vmovdq{a,u} ymm/m256,ymm */
> +    case 0x7f: /* movq mm,mm/m64 */
> +               /* {,v}movdq{a,u} xmm,xmm/m128 */
> +               /* vmovdq{a,u} ymm,ymm/m256 */
> +    case 0xe7: /* movntq mm,mm/m64 */
> +               /* {,v}movntdq xmm,xmm/m128 */
> +               /* vmovntdq{a,u} ymm,ymm/m256 */
> +    {
> +        uint8_t stub[] = { 0x3e, 0x3e, 0x0f, b, modrm, 0xc3 };
>          struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
> -        uint64_t val;
> -        if ( ea.type == OP_MEM )
> +
> +        if ( vex.opcx == vex_none )
>          {
> -            unsigned long lval, hval;
> -            if ( (rc = read_ulong(ea.mem.seg, ea.mem.off+0,
> -                                  &lval, 4, ctxt, ops)) ||
> -                 (rc = read_ulong(ea.mem.seg, ea.mem.off+4,
> -                                  &hval, 4, ctxt, ops)) )
> -                goto done;
> -            val = ((uint64_t)hval << 32) | (uint32_t)lval;
> -            stub[2] = modrm & 0x38; /* movq (%eax),%mmN */
> +            switch ( vex.pfx )
> +            {
> +            case vex_f3:
> +                fail_if(b == 0xe7);
> +                /* fall through */
> +            case vex_66:
> +                vcpu_must_have_sse2();
> +                stub[0] = 0x66; /* movdqa */
> +                get_fpu(X86EMUL_FPU_xmm, &fic);
> +                ea.bytes = 16;
> +                break;
> +            case vex_none:
> +                if ( b != 0xe7 )
> +                    vcpu_must_have_mmx();
> +                else
> +                    vcpu_must_have_sse();
> +                get_fpu(X86EMUL_FPU_mmx, &fic);
> +                ea.bytes = 8;
> +                break;
> +            default:
> +                goto cannot_emulate;
> +            }
> +        }
> +        else
> +        {
> +            fail_if(vex.opcx != vex_0f || vex.reg ||
> +                    (vex.pfx != vex_66 && (vex.pfx != vex_f3 || b == 0xe7)));
> +            vcpu_must_have_avx();
> +            get_fpu(X86EMUL_FPU_ymm, &fic);
> +            ea.bytes = 16 << vex.l;
>          }
> -        get_fpu(X86EMUL_FPU_mmx, &fic);
> -        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
> -        put_fpu(&fic);
> -        break;
> -    }
> -
> -    case 0x7f: /* movq mm,mm/m64 */ {
> -        uint8_t stub[] = { 0x0f, 0x7f, modrm, 0xc3 };
> -        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
> -        uint64_t val;
> -        if ( ea.type == OP_MEM )
> -            stub[2] = modrm & 0x38; /* movq %mmN,(%eax) */
> -        get_fpu(X86EMUL_FPU_mmx, &fic);
> -        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
> -        put_fpu(&fic);
>          if ( ea.type == OP_MEM )
>          {
> -            unsigned long lval = (uint32_t)val, hval = (uint32_t)(val >> 32);
> -            if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, &lval, 4, ctxt))
> ||
> -                 (rc = ops->write(ea.mem.seg, ea.mem.off+4, &hval, 4, ctxt))
> )
> -                goto done;
> +            /* XXX
> +            generate_exception_if(vex.pfx == vex_66 &&
> +                                  (ops->ea(ea.mem.seg, ea.mem.off)
> +                                   & (ea.bytes - 1)), EXC_GP, 0); */
> +            if ( b == 0x6f )
> +                rc = ops->read(ea.mem.seg, ea.mem.off+0, &this_cpu(mmval),
> +                               ea.bytes, ctxt);
> +            /* convert memory operand to (%rAX) */
> +            rex_prefix &= ~REX_B;
> +            vex.b = 1;
> +            stub[4] &= 0x38;
> +        }
> +        if ( !rc )
> +        {
> +           copy_REX_VEX(stub, rex_prefix, vex);
> +           asm volatile ( "call *%0" : : "r" (stub), "a" (&this_cpu(mmval))
> +                                     : "memory" );
>          }
> -        break;
> +        put_fpu(&fic);
> +        if ( b != 0x6f && ea.type == OP_MEM )
> +            rc = ops->write(ea.mem.seg, ea.mem.off, &this_cpu(mmval),
> +                            ea.bytes, ctxt);
> +        goto done;
>      }
>  
>      case 0x80 ... 0x8f: /* jcc (near) */ {
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -99,7 +99,9 @@ struct segment_register {
>  /* FPU sub-types which may be requested via ->get_fpu(). */
>  enum x86_emulate_fpu_type {
>      X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */
> -    X86EMUL_FPU_mmx  /* MMX instruction set (%mm0-%mm7) */
> +    X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */
> +    X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
> +    X86EMUL_FPU_ymm  /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
>  };
>  
>  /*
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -10,9 +10,11 @@
>   */
>  
>  #include <asm/cpufeature.h>
> +#include <asm/processor.h>
>  #include <asm/x86_emulate.h>
>  
>  /* Avoid namespace pollution. */
>  #undef cmpxchg
> +#undef cpuid
>  
>  #include "x86_emulate/x86_emulate.c"
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -218,7 +218,7 @@
>  #define cpu_has_x2apic          boot_cpu_has(X86_FEATURE_X2APIC)
>  
>  #define cpu_has_xsave           boot_cpu_has(X86_FEATURE_XSAVE)
> -
> +#define cpu_has_avx             boot_cpu_has(X86_FEATURE_AVX)
>  #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
>  
>  #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.