[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [V11 1/3] x86/xsaves: enable xsaves/xrstors/xsavec in xen



This patch uses xsaves/xrstors/xsavec instead of xsaveopt/xrstor
to perform the xsave_area switching so that xen itself
can benefit from them when available.

For xsaves/xrstors/xsavec only use compact format. Add format conversion
support when perform guest os migration. Also, pv guest will not support
xsaves/xrstors.

Signed-off-by: Shuai Ruan <shuai.ruan@xxxxxxxxxxxxxxx>
---
 xen/arch/x86/domain.c             |   8 ++
 xen/arch/x86/domctl.c             |  30 ++++-
 xen/arch/x86/hvm/hvm.c            |  18 ++-
 xen/arch/x86/i387.c               |   4 +
 xen/arch/x86/traps.c              |   6 +-
 xen/arch/x86/xstate.c             | 265 +++++++++++++++++++++++++++++++++-----
 xen/include/asm-x86/alternative.h |   3 +
 xen/include/asm-x86/xstate.h      |   2 +
 8 files changed, 292 insertions(+), 44 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 3f9e5d2..9476869 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -910,7 +910,12 @@ int arch_set_info_guest(
     {
         memcpy(v->arch.fpu_ctxt, &c.nat->fpu_ctxt, sizeof(c.nat->fpu_ctxt));
         if ( v->arch.xsave_area )
+        {
              v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
+             if ( cpu_has_xsaves || cpu_has_xsavec )
+                  v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP_SSE |
+                                                           
XSTATE_COMPACTION_ENABLED;
+        }
     }
 
     if ( !compat )
@@ -1594,6 +1599,9 @@ static void __context_switch(void)
 
             if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
                 BUG();
+
+            if ( cpu_has_xsaves && has_hvm_container_vcpu(n) )
+                set_msr_xss(n->arch.hvm_vcpu.msr_xss);
         }
         vcpu_restore_fpu_eager(n);
         n->arch.ctxt_switch_to(n);
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 0f6fdb9..95b0747 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -897,9 +897,29 @@ long arch_do_domctl(
                 ret = -EFAULT;
 
             offset += sizeof(v->arch.xcr0_accum);
-            if ( !ret && copy_to_guest_offset(evc->buffer, offset,
-                                              (void *)v->arch.xsave_area,
-                                              size - 2 * sizeof(uint64_t)) )
+            if ( !ret && (cpu_has_xsaves || cpu_has_xsavec) )
+            {
+                void *xsave_area;
+
+                xsave_area = xmalloc_bytes(size);
+                if ( !xsave_area )
+                {
+                    ret = -ENOMEM;
+                    vcpu_unpause(v);
+                    goto vcpuextstate_out;
+                }
+
+                expand_xsave_states(v, xsave_area,
+                                    size - 2 * sizeof(uint64_t));
+
+                if ( copy_to_guest_offset(evc->buffer, offset, xsave_area,
+                                          size - 2 * sizeof(uint64_t)) )
+                     ret = -EFAULT;
+                xfree(xsave_area);
+           }
+           else if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+                                                  (void *)v->arch.xsave_area,
+                                                  size - 2 * sizeof(uint64_t)) 
)
                 ret = -EFAULT;
 
             vcpu_unpause(v);
@@ -955,8 +975,8 @@ long arch_do_domctl(
                 v->arch.xcr0_accum = _xcr0_accum;
                 if ( _xcr0_accum & XSTATE_NONLAZY )
                     v->arch.nonlazy_xstate_used = 1;
-                memcpy(v->arch.xsave_area, _xsave_area,
-                       evc->size - 2 * sizeof(uint64_t));
+                compress_xsave_states(v, _xsave_area,
+                                      evc->size - 2 * sizeof(uint64_t));
                 vcpu_unpause(v);
             }
             else
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ea982e2..20148f5 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2092,6 +2092,9 @@ static int hvm_load_cpu_ctxt(struct domain *d, 
hvm_domain_context_t *h)
 
         memcpy(v->arch.xsave_area, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
         xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
+        if ( cpu_has_xsaves || cpu_has_xsavec )
+            xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP_SSE |
+                                             XSTATE_COMPACTION_ENABLED;
     }
     else
         memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
@@ -2161,8 +2164,8 @@ static int hvm_save_cpu_xsave_states(struct domain *d, 
hvm_domain_context_t *h)
         ctxt->xfeature_mask = xfeature_mask;
         ctxt->xcr0 = v->arch.xcr0;
         ctxt->xcr0_accum = v->arch.xcr0_accum;
-        memcpy(&ctxt->save_area, v->arch.xsave_area,
-               size - offsetof(struct hvm_hw_cpu_xsave, save_area));
+        expand_xsave_states(v, &ctxt->save_area,
+                            size - offsetof(typeof(*ctxt), save_area));
     }
 
     return 0;
@@ -2261,9 +2264,9 @@ static int hvm_load_cpu_xsave_states(struct domain *d, 
hvm_domain_context_t *h)
     v->arch.xcr0_accum = ctxt->xcr0_accum;
     if ( ctxt->xcr0_accum & XSTATE_NONLAZY )
         v->arch.nonlazy_xstate_used = 1;
-    memcpy(v->arch.xsave_area, &ctxt->save_area,
-           min(desc->length, size) - offsetof(struct hvm_hw_cpu_xsave,
-           save_area));
+    compress_xsave_states(v, &ctxt->save_area,
+                          min(desc->length, size) -
+                          offsetof(struct hvm_hw_cpu_xsave,save_area));
 
     return 0;
 }
@@ -5503,7 +5506,12 @@ void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, 
uint16_t ip)
     fpu_ctxt->fcw = FCW_RESET;
     fpu_ctxt->mxcsr = MXCSR_DEFAULT;
     if ( v->arch.xsave_area )
+    {
         v->arch.xsave_area->xsave_hdr.xstate_bv = XSTATE_FP;
+        if ( cpu_has_xsaves || cpu_has_xsavec )
+            v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_FP |
+                                                     XSTATE_COMPACTION_ENABLED;
+    }
 
     v->arch.vgc_flags = VGCF_online;
     memset(&v->arch.user_regs, 0, sizeof(v->arch.user_regs));
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 66b51cb..b661d39 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -282,7 +282,11 @@ int vcpu_init_fpu(struct vcpu *v)
         return rc;
 
     if ( v->arch.xsave_area )
+    {
         v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
+        if ( cpu_has_xsaves || cpu_has_xsavec )
+            v->arch.xsave_area->xsave_hdr.xcomp_bv = XSTATE_COMPACTION_ENABLED;
+    }
     else
     {
         v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index e21fb78..075f98e 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -939,9 +939,9 @@ void pv_cpuid(struct cpu_user_regs *regs)
             goto unsupported;
         if ( regs->_ecx == 1 )
         {
-            a &= 
boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)];
-            if ( !cpu_has_xsaves )
-                b = c = d = 0;
+            a &= 
(boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)] &
+                  ~cpufeat_mask(X86_FEATURE_XSAVES));
+            b = c = d = 0;
         }
         break;
 
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 827e0e1..f3ae285 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -24,6 +24,11 @@ static u32 __read_mostly xsave_cntxt_size;
 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
 u64 __read_mostly xfeature_mask;
 
+static unsigned int * __read_mostly xstate_offsets;
+static unsigned int * __read_mostly xstate_sizes;
+static unsigned int __read_mostly xstate_features;
+static unsigned int __read_mostly xstate_comp_offsets[sizeof(xfeature_mask)*8];
+
 /* Cached xcr0 for fast read */
 static DEFINE_PER_CPU(uint64_t, xcr0);
 
@@ -79,6 +84,161 @@ uint64_t get_msr_xss(void)
     return this_cpu(xss);
 }
 
+static bool_t xsave_area_compressed(const struct xsave_struct *xsave_area)
+{
+     return xsave_area && (xsave_area->xsave_hdr.xcomp_bv
+                           & XSTATE_COMPACTION_ENABLED);
+}
+
+static int setup_xstate_features(bool_t bsp)
+{
+    unsigned int leaf, tmp, eax, ebx;
+
+    if ( bsp )
+    {
+        xstate_features = fls(xfeature_mask);
+        xstate_offsets = xzalloc_array(unsigned int, xstate_features);
+        if ( !xstate_offsets )
+            return -ENOMEM;
+
+        xstate_sizes = xzalloc_array(unsigned int, xstate_features);
+        if ( !xstate_sizes )
+            return -ENOMEM;
+    }
+
+    for ( leaf = 2; leaf < xstate_features; leaf++ )
+    {
+        if ( bsp )
+            cpuid_count(XSTATE_CPUID, leaf, &xstate_sizes[leaf],
+                        &xstate_offsets[leaf], &tmp, &tmp);
+        else
+        {
+            cpuid_count(XSTATE_CPUID, leaf, &eax,
+                        &ebx, &tmp, &tmp);
+            BUG_ON(eax != xstate_sizes[leaf]);
+            BUG_ON(ebx != xstate_offsets[leaf]);
+        }
+    }
+
+    return 0;
+}
+
+static void __init setup_xstate_comp(void)
+{
+    unsigned int i;
+
+    /*
+     * The FP xstates and SSE xstates are legacy states. They are always
+     * in the fixed offsets in the xsave area in either compacted form
+     * or standard form.
+     */
+    xstate_comp_offsets[0] = 0;
+    xstate_comp_offsets[1] = XSAVE_SSE_OFFSET;
+
+    xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+    for ( i = 3; i < xstate_features; i++ )
+    {
+        xstate_comp_offsets[i] = xstate_comp_offsets[i - 1] +
+                                 (((1ul << i) & xfeature_mask)
+                                  ? xstate_sizes[i - 1] : 0);
+        ASSERT(xstate_comp_offsets[i] + xstate_sizes[i] <= xsave_cntxt_size);
+    }
+}
+
+static void *get_xsave_addr(void *xsave, unsigned int xfeature_idx)
+{
+    if ( !((1ul << xfeature_idx) & xfeature_mask) )
+        return NULL;
+
+    return xsave + xstate_comp_offsets[xfeature_idx];
+}
+
+void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size)
+{
+    struct xsave_struct *xsave = v->arch.xsave_area;
+    u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+    u64 valid;
+
+    if ( !cpu_has_xsaves && !cpu_has_xsavec )
+    {
+        memcpy(dest, xsave, size);
+        return;
+    }
+
+    ASSERT(xsave_area_compressed(xsave));
+    /*
+     * Copy legacy XSAVE area and XSAVE hdr area.
+     */
+    memcpy(dest, xsave, XSTATE_AREA_MIN_SIZE);
+
+    ((struct xsave_struct *)dest)->xsave_hdr.xcomp_bv =  0;
+
+    /*
+     * Copy each region from the possibly compacted offset to the
+     * non-compacted offset.
+     */
+    valid = xstate_bv & ~XSTATE_FP_SSE;
+    while ( valid )
+    {
+        u64 feature = valid & -valid;
+        unsigned int index = fls(feature) - 1;
+        const void *src = get_xsave_addr(xsave, index);
+
+        if ( src )
+        {
+            ASSERT((xstate_offsets[index] + xstate_sizes[index]) <= size);
+            memcpy(dest + xstate_offsets[index], src, xstate_sizes[index]);
+        }
+
+        valid &= ~feature;
+    }
+}
+
+void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size)
+{
+    struct xsave_struct *xsave = v->arch.xsave_area;
+    u64 xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv;
+    u64 valid;
+
+    if ( !cpu_has_xsaves && !cpu_has_xsavec )
+    {
+        memcpy(xsave, src, size);
+        return;
+    }
+
+    ASSERT(!xsave_area_compressed(src));
+    /*
+     * Copy legacy XSAVE area, to avoid complications with CPUID
+     * leaves 0 and 1 in the loop below.
+     */
+    memcpy(xsave, src, FXSAVE_SIZE);
+
+    /* Set XSTATE_BV and XCOMP_BV.  */
+    xsave->xsave_hdr.xstate_bv = xstate_bv;
+    xsave->xsave_hdr.xcomp_bv = v->arch.xcr0_accum | XSTATE_COMPACTION_ENABLED;
+
+    /*
+     * Copy each region from the non-compacted offset to the
+     * possibly compacted offset.
+     */
+    valid = xstate_bv & ~XSTATE_FP_SSE;
+    while ( valid )
+    {
+        u64 feature = valid & -valid;
+        unsigned int index = fls(feature) - 1;
+        void *dest = get_xsave_addr(xsave, index);
+
+        if ( dest )
+        {
+            ASSERT((xstate_offsets[index] + xstate_sizes[index]) <= size);
+            memcpy(dest, src + xstate_offsets[index], xstate_sizes[index]);
+        }
+
+        valid &= ~feature;
+    }
+}
+
 void xsave(struct vcpu *v, uint64_t mask)
 {
     struct xsave_struct *ptr = v->arch.xsave_area;
@@ -91,7 +251,15 @@ void xsave(struct vcpu *v, uint64_t mask)
         typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
         typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
 
-        if ( cpu_has_xsaveopt )
+        if ( cpu_has_xsaves )
+            asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        else if ( cpu_has_xsavec )
+            asm volatile ( ".byte 0x48,0x0f,0xc7,0x27"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        else if ( cpu_has_xsaveopt )
         {
             /*
              * xsaveopt may not write the FPU portion even when the respective
@@ -144,7 +312,15 @@ void xsave(struct vcpu *v, uint64_t mask)
     }
     else
     {
-        if ( cpu_has_xsaveopt )
+        if ( cpu_has_xsaves )
+            asm volatile ( ".byte 0x0f,0xc7,0x2f"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        else if ( cpu_has_xsavec )
+            asm volatile ( ".byte 0x0f,0xc7,0x27"
+                           : "=m" (*ptr)
+                           : "a" (lmask), "d" (hmask), "D" (ptr) );
+        else if ( cpu_has_xsaveopt )
             asm volatile ( ".byte 0x0f,0xae,0x37"
                            : "=m" (*ptr)
                            : "a" (lmask), "d" (hmask), "D" (ptr) );
@@ -187,36 +363,56 @@ void xrstor(struct vcpu *v, uint64_t mask)
     switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
     {
     default:
-        asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n"
-                       ".section .fixup,\"ax\"      \n"
-                       "2: mov %5,%%ecx             \n"
-                       "   xor %1,%1                \n"
-                       "   rep stosb                \n"
-                       "   lea %2,%0                \n"
-                       "   mov %3,%1                \n"
-                       "   jmp 1b                   \n"
-                       ".previous                   \n"
-                       _ASM_EXTABLE(1b, 2b)
-                       : "+&D" (ptr), "+&a" (lmask)
-                       : "m" (*ptr), "g" (lmask), "d" (hmask),
-                         "m" (xsave_cntxt_size)
-                       : "ecx" );
+        alternative_io( "1: "".byte 0x48,0x0f,0xae,0x2f \n"
+                        ".section .fixup,\"ax\"         \n"
+                        "2: mov %6,%%ecx                \n"
+                        "   xor %1,%1                   \n"
+                        "   rep stosb                   \n"
+                        "   lea %3,%0                   \n"
+                        "   mov %4,%1                   \n"
+                        "   jmp 1b                      \n"
+                        ".previous                      \n"
+                        _ASM_EXTABLE(1b, 2b),
+                        ".byte 0x48,0x0f,0xc7,0x1f      \n"
+                        ".section .fixup,\"ax\"         \n"
+                        "2: mov %6,%%ecx                \n"
+                        "   xor %1,%1                   \n"
+                        "   rep stosb                   \n"
+                        "   lea %3,%0                   \n"
+                        "   mov %4,%1                   \n"
+                        "   jmp 1b                      \n"
+                        ".previous                      \n"
+                        _ASM_EXTABLE(1b, 2b),
+                        X86_FEATURE_XSAVES,
+                        ASM_OUTPUT2("+&D" (ptr), "+&a" (lmask)),
+                        "m" (*ptr), "g" (lmask), "d" (hmask), "m" 
(xsave_cntxt_size)
+                        : "ecx" );
         break;
     case 4: case 2:
-        asm volatile ( "1: .byte 0x0f,0xae,0x2f\n"
-                       ".section .fixup,\"ax\" \n"
-                       "2: mov %5,%%ecx        \n"
-                       "   xor %1,%1           \n"
-                       "   rep stosb           \n"
-                       "   lea %2,%0           \n"
-                       "   mov %3,%1           \n"
-                       "   jmp 1b              \n"
-                       ".previous              \n"
-                       _ASM_EXTABLE(1b, 2b)
-                       : "+&D" (ptr), "+&a" (lmask)
-                       : "m" (*ptr), "g" (lmask), "d" (hmask),
-                         "m" (xsave_cntxt_size)
-                       : "ecx" );
+        alternative_io( "1: "".byte 0x0f,0xae,0x2f \n"
+                        ".section .fixup,\"ax\"    \n"
+                        "2: mov %6,%%ecx           \n"
+                        "   xor %1,%1              \n"
+                        "   rep stosb              \n"
+                        "   lea %3,%0              \n"
+                        "   mov %4,%1              \n"
+                        "   jmp 1b                 \n"
+                        ".previous                 \n"
+                        _ASM_EXTABLE(1b, 2b),
+                        ".byte 0x0f,0xc7,0x1f      \n"
+                        ".section .fixup,\"ax\"    \n"
+                        "2: mov %6,%%ecx           \n"
+                        "   xor %1,%1              \n"
+                        "   rep stosb              \n"
+                        "   lea %3,%0              \n"
+                        "   mov %4,%1              \n"
+                        "   jmp 1b                 \n"
+                        ".previous                 \n"
+                        _ASM_EXTABLE(1b, 2b),
+                        X86_FEATURE_XSAVES,
+                        ASM_OUTPUT2("+&D" (ptr), "+&a" (lmask)),
+                        "m" (*ptr), "g" (lmask), "d" (hmask), "m" 
(xsave_cntxt_size)
+                        : "ecx" );
         break;
     }
 }
@@ -343,11 +539,18 @@ void xstate_init(struct cpuinfo_x86 *c)
 
     /* Mask out features not currently understood by Xen. */
     eax &= (cpufeat_mask(X86_FEATURE_XSAVEOPT) |
-            cpufeat_mask(X86_FEATURE_XSAVEC));
+            cpufeat_mask(X86_FEATURE_XSAVEC) |
+            cpufeat_mask(X86_FEATURE_XGETBV1) |
+            cpufeat_mask(X86_FEATURE_XSAVES));
 
     c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)] = eax;
 
     BUG_ON(eax != 
boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)]);
+
+    if ( setup_xstate_features(bsp) && bsp )
+        BUG();
+    if ( bsp && (cpu_has_xsaves || cpu_has_xsavec) )
+        setup_xstate_comp();
 }
 
 static bool_t valid_xcr0(u64 xcr0)
diff --git a/xen/include/asm-x86/alternative.h 
b/xen/include/asm-x86/alternative.h
index 23c9b9f..7ddbd76 100644
--- a/xen/include/asm-x86/alternative.h
+++ b/xen/include/asm-x86/alternative.h
@@ -94,6 +94,9 @@ extern void alternative_instructions(void);
        asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)          \
                : output : "i" (0), ## input)
 
+/* Use this macro(s) if you need more than one output parameter. */
+#define ASM_OUTPUT2(a...) a
+
 #endif  /*  __ASSEMBLY__  */
 
 #endif /* __X86_ALTERNATIVE_H__ */
diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h
index b95a5b5..414cc99 100644
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -91,6 +91,8 @@ void xrstor(struct vcpu *v, uint64_t mask);
 bool_t xsave_enabled(const struct vcpu *v);
 int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv);
 int __must_check handle_xsetbv(u32 index, u64 new_bv);
+void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size);
+void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size);
 
 /* extended state init and cleanup functions */
 void xstate_free_save_area(struct vcpu *v);
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.