|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH V4 2/4] x86/xsaves: enable xsaves/xrstors in xen
This patch uses xsaves/xrstors instead of xsaveopt/xrstor
to perform the xsave_area switching so that xen itself
can benefit from them when available.
For xsaves/xrstors only use compact format. Add format conversion
support when perform guest os migration.
Signed-off-by: Shuai Ruan <shuai.ruan@xxxxxxxxxxxxxxx>
---
xen/arch/x86/domain.c | 2 +
xen/arch/x86/domctl.c | 34 ++++++++++++---
xen/arch/x86/hvm/hvm.c | 19 ++++++---
xen/arch/x86/i387.c | 4 ++
xen/arch/x86/traps.c | 7 ++--
xen/arch/x86/xstate.c | 112 ++++++++++++++++++++++++++++++++++---------------
6 files changed, 132 insertions(+), 46 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 045f6ff..7b8f649 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1529,6 +1529,8 @@ static void __context_switch(void)
if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
BUG();
}
+ if ( cpu_has_xsaves )
+ wrmsrl(MSR_IA32_XSS, n->arch.msr_ia32_xss);
vcpu_restore_fpu_eager(n);
n->arch.ctxt_switch_to(n);
}
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index bf62a88..da136876 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -867,6 +867,7 @@ long arch_do_domctl(
if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
{
unsigned int size;
+ void * xsave_area;
ret = 0;
vcpu_pause(v);
@@ -896,9 +897,28 @@ long arch_do_domctl(
ret = -EFAULT;
offset += sizeof(v->arch.xcr0_accum);
- if ( !ret && copy_to_guest_offset(evc->buffer, offset,
- (void *)v->arch.xsave_area,
- size - 2 * sizeof(uint64_t)) )
+
+ if ( cpu_has_xsaves )
+ {
+ xsave_area = xmalloc_bytes(size);
+ if ( !xsave_area )
+ {
+ ret = -ENOMEM;
+ goto vcpuextstate_out;
+ }
+
+ save_xsave_states(v, xsave_area,
+ evc->size - 2*sizeof(uint64_t));
+
+ if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+ xsave_area, size -
+ 2 * sizeof(uint64_t)) )
+ ret = -EFAULT;
+ xfree(xsave_area);
+ }
+ else if ( !ret && copy_to_guest_offset(evc->buffer, offset,
+ (void *)v->arch.xsave_area,
+ size - 2 * sizeof(uint64_t)) )
ret = -EFAULT;
vcpu_unpause(v);
@@ -954,8 +974,12 @@ long arch_do_domctl(
v->arch.xcr0_accum = _xcr0_accum;
if ( _xcr0_accum & XSTATE_NONLAZY )
v->arch.nonlazy_xstate_used = 1;
- memcpy(v->arch.xsave_area, _xsave_area,
- evc->size - 2 * sizeof(uint64_t));
+ if ( cpu_has_xsaves )
+ load_xsave_states(v, (void *)_xsave_area,
+ evc->size - 2*sizeof(uint64_t));
+ else
+ memcpy(v->arch.xsave_area, (void *)_xsave_area,
+ evc->size - 2 * sizeof(uint64_t));
vcpu_unpause(v);
}
else
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c957610..dc444ac 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2148,8 +2148,12 @@ static int hvm_save_cpu_xsave_states(struct domain *d,
hvm_domain_context_t *h)
ctxt->xfeature_mask = xfeature_mask;
ctxt->xcr0 = v->arch.xcr0;
ctxt->xcr0_accum = v->arch.xcr0_accum;
- memcpy(&ctxt->save_area, v->arch.xsave_area,
- size - offsetof(struct hvm_hw_cpu_xsave, save_area));
+ if ( cpu_has_xsaves )
+ save_xsave_states(v, (void *)&ctxt->save_area,
+ size - offsetof(struct
hvm_hw_cpu_xsave,save_area));
+ else
+ memcpy(&ctxt->save_area, v->arch.xsave_area,
+ size - offsetof(struct hvm_hw_cpu_xsave, save_area));
}
return 0;
@@ -2248,9 +2252,14 @@ static int hvm_load_cpu_xsave_states(struct domain *d,
hvm_domain_context_t *h)
v->arch.xcr0_accum = ctxt->xcr0_accum;
if ( ctxt->xcr0_accum & XSTATE_NONLAZY )
v->arch.nonlazy_xstate_used = 1;
- memcpy(v->arch.xsave_area, &ctxt->save_area,
- min(desc->length, size) - offsetof(struct hvm_hw_cpu_xsave,
- save_area));
+ if ( cpu_has_xsaves )
+ load_xsave_states(v, (void *)&ctxt->save_area,
+ min(desc->length, size) -
+ offsetof(struct hvm_hw_cpu_xsave,save_area));
+ else
+ memcpy(v->arch.xsave_area, &ctxt->save_area,
+ min(desc->length, size) - offsetof(struct hvm_hw_cpu_xsave,
+ save_area));
return 0;
}
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 14f2a79..b60b194 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -309,7 +309,11 @@ int vcpu_init_fpu(struct vcpu *v)
return rc;
if ( v->arch.xsave_area )
+ {
v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
+ if ( cpu_has_xsaves )
+ v->arch.xsave_area->xsave_hdr.xcomp_bv |=
XSTATE_COMPACTION_ENABLED;
+ }
else
{
v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse), 16);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 9f5a6c6..e9beec1 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -936,9 +936,10 @@ void pv_cpuid(struct cpu_user_regs *regs)
if ( regs->_ecx == 1 )
{
a &= XSTATE_FEATURE_XSAVEOPT |
- XSTATE_FEATURE_XSAVEC |
- (cpu_has_xgetbv1 ? XSTATE_FEATURE_XGETBV1 : 0) |
- (cpu_has_xsaves ? XSTATE_FEATURE_XSAVES : 0);
+ XSTATE_FEATURE_XSAVEC;
+ /* PV guest will not support xsaves. */
+ /* (cpu_has_xgetbv1 ? XSTATE_FEATURE_XGETBV1 : 0) |
+ (cpu_has_xsaves ? XSTATE_FEATURE_XSAVES : 0); */
if ( !cpu_has_xsaves )
b = c = d = 0;
}
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 3986515..9050607 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -214,6 +214,11 @@ void xsave(struct vcpu *v, uint64_t mask)
typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel;
typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel;
+ if ( cpu_has_xsaves )
+ asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
+ : "=m" (*ptr)
+ : "a" (lmask), "d" (hmask), "D" (ptr) );
+ else
if ( cpu_has_xsaveopt )
{
/*
@@ -267,6 +272,11 @@ void xsave(struct vcpu *v, uint64_t mask)
}
else
{
+ if ( cpu_has_xsaves )
+ asm volatile ( ".byte 0x48,0x0f,0xc7,0x2f"
+ : "=m" (*ptr)
+ : "a" (lmask), "d" (hmask), "D" (ptr) );
+ else
if ( cpu_has_xsaveopt )
asm volatile ( ".byte 0x0f,0xae,0x37"
: "=m" (*ptr)
@@ -310,36 +320,68 @@ void xrstor(struct vcpu *v, uint64_t mask)
switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
{
default:
- asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n"
- ".section .fixup,\"ax\" \n"
- "2: mov %5,%%ecx \n"
- " xor %1,%1 \n"
- " rep stosb \n"
- " lea %2,%0 \n"
- " mov %3,%1 \n"
- " jmp 1b \n"
- ".previous \n"
- _ASM_EXTABLE(1b, 2b)
- : "+&D" (ptr), "+&a" (lmask)
- : "m" (*ptr), "g" (lmask), "d" (hmask),
- "m" (xsave_cntxt_size)
- : "ecx" );
- break;
+ if ( cpu_has_xsaves )
+ asm volatile ( "1: .byte 0x48,0x0f,0xc7,0x1f\n"
+ ".section .fixup,\"ax\" \n"
+ "2: mov %5,%%ecx \n"
+ " xor %1,%1 \n"
+ " rep stosb \n"
+ " lea %2,%0 \n"
+ " mov %3,%1 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ : "+&D" (ptr), "+&a" (lmask)
+ : "m" (*ptr), "g" (lmask), "d" (hmask),
+ "m" (xsave_cntxt_size)
+ : "ecx" );
+ else
+ asm volatile ( "1: .byte 0x48,0x0f,0xae,0x2f\n"
+ ".section .fixup,\"ax\" \n"
+ "2: mov %5,%%ecx \n"
+ " xor %1,%1 \n"
+ " rep stosb \n"
+ " lea %2,%0 \n"
+ " mov %3,%1 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ : "+&D" (ptr), "+&a" (lmask)
+ : "m" (*ptr), "g" (lmask), "d" (hmask),
+ "m" (xsave_cntxt_size)
+ : "ecx" );
+ break;
case 4: case 2:
- asm volatile ( "1: .byte 0x0f,0xae,0x2f\n"
- ".section .fixup,\"ax\" \n"
- "2: mov %5,%%ecx \n"
- " xor %1,%1 \n"
- " rep stosb \n"
- " lea %2,%0 \n"
- " mov %3,%1 \n"
- " jmp 1b \n"
- ".previous \n"
- _ASM_EXTABLE(1b, 2b)
- : "+&D" (ptr), "+&a" (lmask)
- : "m" (*ptr), "g" (lmask), "d" (hmask),
- "m" (xsave_cntxt_size)
- : "ecx" );
+ if ( cpu_has_xsaves )
+ asm volatile ( "1: .byte 0x48,0x0f,0xc7,0x1f\n"
+ ".section .fixup,\"ax\" \n"
+ "2: mov %5,%%ecx \n"
+ " xor %1,%1 \n"
+ " rep stosb \n"
+ " lea %2,%0 \n"
+ " mov %3,%1 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ : "+&D" (ptr), "+&a" (lmask)
+ : "m" (*ptr), "g" (lmask), "d" (hmask),
+ "m" (xsave_cntxt_size)
+ : "ecx" );
+ else
+ asm volatile ( "1: .byte 0x0f,0xae,0x2f\n"
+ ".section .fixup,\"ax\" \n"
+ "2: mov %5,%%ecx \n"
+ " xor %1,%1 \n"
+ " rep stosb \n"
+ " lea %2,%0 \n"
+ " mov %3,%1 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ _ASM_EXTABLE(1b, 2b)
+ : "+&D" (ptr), "+&a" (lmask)
+ : "m" (*ptr), "g" (lmask), "d" (hmask),
+ "m" (xsave_cntxt_size)
+ : "ecx" );
break;
}
}
@@ -466,16 +508,20 @@ void xstate_init(bool_t bsp)
{
cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT);
cpu_has_xsavec = !!(eax & XSTATE_FEATURE_XSAVEC);
- /* XXX cpu_has_xgetbv1 = !!(eax & XSTATE_FEATURE_XGETBV1); */
- /* XXX cpu_has_xsaves = !!(eax & XSTATE_FEATURE_XSAVES); */
+ cpu_has_xgetbv1 = !!(eax & XSTATE_FEATURE_XGETBV1);
+ cpu_has_xsaves = !!(eax & XSTATE_FEATURE_XSAVES);
}
else
{
BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT));
BUG_ON(!cpu_has_xsavec != !(eax & XSTATE_FEATURE_XSAVEC));
- /* XXX BUG_ON(!cpu_has_xgetbv1 != !(eax & XSTATE_FEATURE_XGETBV1)); */
- /* XXX BUG_ON(!cpu_has_xsaves != !(eax & XSTATE_FEATURE_XSAVES)); */
+ BUG_ON(!cpu_has_xgetbv1 != !(eax & XSTATE_FEATURE_XGETBV1));
+ BUG_ON(!cpu_has_xsaves != !(eax & XSTATE_FEATURE_XSAVES));
}
+
+ setup_xstate_features();
+ if ( cpu_has_xsaves )
+ setup_xstate_comp();
}
static bool_t valid_xcr0(u64 xcr0)
--
1.9.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |