[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/6] x86/xsaves: enable xsaves/xrstors for pv guest



This patch emualtes xsaves/xrstors instruction and
XSS msr access.

As xsaves/xrstors instructions and XSS msr access
required be executed only in ring0. So emulation is
needed when pv guest use these instructions.

Signed-off-by: Shuai Ruan <shuai.ruan@xxxxxxxxx>
---
 xen/arch/x86/domain.c           |  3 ++
 xen/arch/x86/traps.c            | 85 +++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/x86_64/mm.c        | 52 +++++++++++++++++++++++++
 xen/arch/x86/xstate.c           | 39 +++++++++++++++++++
 xen/include/asm-x86/domain.h    |  1 +
 xen/include/asm-x86/mm.h        |  1 +
 xen/include/asm-x86/msr-index.h |  2 +
 xen/include/asm-x86/xstate.h    |  3 ++
 8 files changed, 186 insertions(+)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a8fe046..66f8231 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -426,6 +426,7 @@ int vcpu_initialise(struct vcpu *v)
 
     /* By default, do not emulate */
     v->arch.vm_event.emulate_flags = 0;
+    v->arch.msr_ia32_xss = 0;
 
     rc = mapcache_vcpu_init(v);
     if ( rc )
@@ -1494,6 +1495,8 @@ static void __context_switch(void)
             if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
                 BUG();
         }
+        if ( cpu_has_xsaves )
+            wrmsr_safe(MSR_IA32_XSS, n->arch.msr_ia32_xss);
         vcpu_restore_fpu_eager(n);
         n->arch.ctxt_switch_to(n);
     }
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index ac62f20..227670b 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2346,6 +2346,80 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
         }
         break;
 
+    case 0xc7:
+    {
+        void *xsave_addr;
+        int not_page_aligned = 0;
+        u32 guest_xsaves_size = xstate_ctxt_size_compact(v->arch.xcr0);
+
+        switch ( insn_fetch(u8, code_base, eip, code_limit) )
+        {
+            case 0x2f:/* XSAVES */
+            {
+                if ( (regs->edi & ~PAGE_MASK) + guest_xsaves_size > PAGE_SIZE )
+                {
+                    mfn_t mfn_list[2];
+                    void *va;
+
+                    not_page_aligned = 1;
+                    mfn_list[0] = _mfn(do_page_walk_mfn(v, regs->edi));
+                    mfn_list[1] = _mfn(do_page_walk_mfn(v,
+                                       PAGE_ALIGN(regs->edi)));
+                    va = __vmap(mfn_list, 1, 2, PAGE_SIZE, PAGE_HYPERVISOR);
+                    ASSERT(((unsigned long) va & ~PAGE_MASK) == 0);
+                    xsave_addr = (void *)((unsigned long)va +
+                                         (regs->edi & ~PAGE_MASK));
+                }
+                else
+                    xsave_addr = do_page_walk(v, regs->edi);
+
+                if ( !xsave_addr )
+                    goto fail;
+
+                xsaves(regs->eax, regs->edx, xsave_addr);
+
+                if ( not_page_aligned )
+                    vunmap((void *)((unsigned long)xsave_addr & PAGE_MASK));
+                else
+                    unmap_domain_page(xsave_addr);
+                break;
+            }
+            case 0x1f:/* XRSTORS */
+            {
+                if( (regs->edi & ~PAGE_MASK) + guest_xsaves_size > PAGE_SIZE )
+                {
+                    mfn_t mfn_list[2];
+                    void *va;
+
+                    not_page_aligned = 1;
+                    mfn_list[0] = _mfn(do_page_walk_mfn(v, regs->edi));
+                    mfn_list[1] = _mfn(do_page_walk_mfn(v,
+                                       PAGE_ALIGN(regs->edi)));
+                    va = __vmap(mfn_list, 1, 2, PAGE_SIZE, PAGE_HYPERVISOR);
+                    ASSERT(((unsigned long) va & ~PAGE_MASK) == 0);
+                    xsave_addr = (void *)((unsigned long)va +
+                                         (regs->edi & ~PAGE_MASK));
+                }
+                else
+                    xsave_addr = do_page_walk(v, regs->edi);
+
+                if ( !xsave_addr )
+                    goto fail;
+
+                xrstors(regs->eax, regs->edx, xsave_addr);
+
+                if ( not_page_aligned )
+                    vunmap((void *)((unsigned long)xsave_addr & PAGE_MASK));
+                else
+                    unmap_domain_page(xsave_addr);
+                break;
+            }
+            default:
+                goto fail;
+        }
+        break;
+    }
+
     case 0x06: /* CLTS */
         (void)do_fpu_taskswitch(0);
         break;
@@ -2638,6 +2712,12 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
                 wrmsrl(regs->_ecx, msr_content);
             break;
 
+        case MSR_IA32_XSS:
+            if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
+                goto fail;
+            v->arch.msr_ia32_xss = msr_content;
+            break;
+
         default:
             if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) == 1 )
                 break;
@@ -2740,6 +2820,11 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
             regs->edx = 0;
             break;
 
+        case MSR_IA32_XSS:
+            regs->eax = v->arch.msr_ia32_xss;
+            regs->edx = v->arch.msr_ia32_xss >> 32;
+            break;
+
         default:
             if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
                 goto rdmsr_writeback;
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 3ef4618..f64aa08 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -48,6 +48,58 @@ l2_pgentry_t __section(".bss.page_aligned") 
l2_bootmap[L2_PAGETABLE_ENTRIES];
 
 l2_pgentry_t *compat_idle_pg_table_l2;
 
+unsigned long do_page_walk_mfn(struct vcpu *v, unsigned long addr)
+{
+    unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
+    l4_pgentry_t l4e, *l4t;
+    l3_pgentry_t l3e, *l3t;
+    l2_pgentry_t l2e, *l2t;
+    l1_pgentry_t l1e, *l1t;
+
+    if ( !is_pv_vcpu(v) || !is_canonical_address(addr) )
+        return 0;
+
+    l4t = map_domain_page(mfn);
+    l4e = l4t[l4_table_offset(addr)];
+    unmap_domain_page(l4t);
+    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+        return 0;
+
+    l3t = map_l3t_from_l4e(l4e);
+    l3e = l3t[l3_table_offset(addr)];
+    unmap_domain_page(l3t);
+    mfn = l3e_get_pfn(l3e);
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
+        return 0;
+    if ( (l3e_get_flags(l3e) & _PAGE_PSE) )
+    {
+        mfn += PFN_DOWN(addr & ((1UL << L3_PAGETABLE_SHIFT) - 1));
+        goto ret;
+    }
+
+    l2t = map_domain_page(mfn);
+    l2e = l2t[l2_table_offset(addr)];
+    unmap_domain_page(l2t);
+    mfn = l2e_get_pfn(l2e);
+    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
+        return 0;
+    if ( (l2e_get_flags(l2e) & _PAGE_PSE) )
+    {
+        mfn += PFN_DOWN(addr & ((1UL << L2_PAGETABLE_SHIFT) - 1));
+        goto ret;
+    }
+
+    l1t = map_domain_page(mfn);
+    l1e = l1t[l1_table_offset(addr)];
+    unmap_domain_page(l1t);
+    mfn = l1e_get_pfn(l1e);
+    if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
+        return 0;
+
+ ret:
+    return mfn;
+}
+
 void *do_page_walk(struct vcpu *v, unsigned long addr)
 {
     unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index d5f5e3b..e34eda3 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -65,6 +65,31 @@ uint64_t get_xcr0(void)
     return this_cpu(xcr0);
 }
 
+void xsaves(uint32_t lmask, uint32_t hmask, struct xsave_struct *ptr)
+{
+    asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
+                    : "=m" (*ptr)
+                    : "a" (lmask), "d" (hmask), "D" (ptr) );
+}
+
+void xrstors(uint32_t lmask, uint32_t hmask, struct xsave_struct *ptr)
+{
+    asm volatile ( "1: .byte 0x48,0x0f,0xc7,0x1f\n"
+                   ".section .fixup,\"ax\"      \n"
+                   "2: mov %5,%%ecx             \n"
+                   "   xor %1,%1                \n"
+                   "   rep stosb                \n"
+                   "   lea %2,%0                \n"
+                   "   mov %3,%1                \n"
+                   "   jmp 1b                   \n"
+                   ".previous                   \n"
+                   _ASM_EXTABLE(1b, 2b)
+                   : "+&D" (ptr), "+&a" (lmask)
+                   : "m" (*ptr), "g" (lmask), "d" (hmask),
+                     "m" (xsave_cntxt_size)
+                   : "ecx" );
+}
+
 void xsave(struct vcpu *v, uint64_t mask)
 {
     struct xsave_struct *ptr = v->arch.xsave_area;
@@ -268,6 +293,20 @@ static unsigned int _xstate_ctxt_size(u64 xcr0)
     return ebx;
 }
 
+unsigned int xstate_ctxt_size_compact(u64 xcr0)
+{
+    u64 act_xcr0 = get_xcr0();
+    u32 eax, ebx = 0, ecx, edx;
+    bool_t ok = set_xcr0(xcr0);
+
+    ASSERT(ok);
+    cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+    ok = set_xcr0(act_xcr0);
+    ASSERT(ok);
+
+    return ebx;
+}
+
 /* Fastpath for common xstate size requests, avoiding reloads of xcr0. */
 unsigned int xstate_ctxt_size(u64 xcr0)
 {
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 96bde65..bcea9d4 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -473,6 +473,7 @@ struct arch_vcpu
      */
     struct xsave_struct *xsave_area;
     uint64_t xcr0;
+    u64 msr_ia32_xss;
     /* Accumulated eXtended features mask for using XSAVE/XRESTORE by Xen
      * itself, as we can never know whether guest OS depends on content
      * preservation whenever guest OS clears one feature flag (for example,
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 8595c38..94a590e 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -524,6 +524,7 @@ void make_cr3(struct vcpu *v, unsigned long mfn);
 void update_cr3(struct vcpu *v);
 int vcpu_destroy_pagetables(struct vcpu *);
 struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code);
+unsigned long do_page_walk_mfn(struct vcpu *v, unsigned long addr);
 void *do_page_walk(struct vcpu *v, unsigned long addr);
 
 int __sync_local_execstate(void);
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 83f2f70..9564113 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -58,6 +58,8 @@
 
 #define MSR_IA32_BNDCFGS               0x00000D90
 
+#define MSR_IA32_XSS                   0x00000da0
+
 #define MSR_MTRRfix64K_00000           0x00000250
 #define MSR_MTRRfix16K_80000           0x00000258
 #define MSR_MTRRfix16K_A0000           0x00000259
diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h
index 4c690db..59c7156 100644
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -82,6 +82,8 @@ struct __packed __attribute__((aligned (64))) xsave_struct
 /* extended state operations */
 bool_t __must_check set_xcr0(u64 xfeatures);
 uint64_t get_xcr0(void);
+void xsaves(uint32_t lmask, uint32_t hmask, struct xsave_struct *ptr);
+void xrstors(uint32_t lmask, uint32_t hmask, struct xsave_struct *ptr);
 void xsave(struct vcpu *v, uint64_t mask);
 void xrstor(struct vcpu *v, uint64_t mask);
 bool_t xsave_enabled(const struct vcpu *v);
@@ -92,6 +94,7 @@ int __must_check handle_xsetbv(u32 index, u64 new_bv);
 void xstate_free_save_area(struct vcpu *v);
 int xstate_alloc_save_area(struct vcpu *v);
 void xstate_init(bool_t bsp);
+unsigned int xstate_ctxt_size_compact(u64 xcr0);
 unsigned int xstate_ctxt_size(u64 xcr0);
 
 #endif /* __ASM_XSTATE_H */
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.