|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RFC] x86/xsave: prefer eager clearing of state over eager restoring
Other than FXRSTOR, XRSTOR allows for setting components to their
initial state. Utilize this to clear register state immediately after
having saved a vCPU's state (which we don't defer past
__context_switch()), considering that
- this supposedly reduces power consumption,
- this might even free up physical registers,
- we don't normally save/restore FPU state for a vCPU on every context
switch (in some initial measurements I've observed an approximate
50:50 relation between the two on a not overly heavily loaded system;
it's clear anyway that this is heavily dependent on what exactly a
vCPU is used for).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
RFC since the full performance effect is still not very clear.
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -33,6 +33,7 @@ static inline void fpu_xrstor(struct vcp
ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
ASSERT(ok);
xrstor(v, mask);
+ v->arch.xstate_dirty = mask;
ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
ASSERT(ok);
}
@@ -148,6 +149,9 @@ static inline void fpu_xsave(struct vcpu
ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
ASSERT(ok);
xsave(v, mask);
+ xstate_load_init(v->arch.xstate_dirty &
+ v->arch.xsave_area->xsave_hdr.xstate_bv);
+ v->arch.xstate_dirty = 0;
ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
ASSERT(ok);
}
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -616,7 +616,7 @@ void __init init_speculation_mitigations
/* Check whether Eager FPU should be enabled by default. */
if ( opt_eager_fpu == -1 )
- opt_eager_fpu = should_use_eager_fpu();
+ opt_eager_fpu = !cpu_has_xsave && should_use_eager_fpu();
/* (Re)init BSP state now that default_spec_ctrl_flags has been
calculated. */
init_shadow_spec_ctrl_state();
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -734,6 +734,7 @@ int handle_xsetbv(u32 index, u64 new_bv)
cr0 &= ~X86_CR0_TS;
}
xrstor(curr, mask);
+ curr->arch.xstate_dirty |= mask;
if ( cr0 & X86_CR0_TS )
write_cr0(cr0);
}
@@ -774,12 +775,19 @@ uint64_t read_bndcfgu(void)
return xstate->xsave_hdr.xstate_bv & X86_XCR0_BNDCSR ? bndcsr->bndcfgu : 0;
}
+void xstate_load_init(uint64_t mask)
+{
+ struct vcpu *v = idle_vcpu[smp_processor_id()];
+ struct xsave_struct *xstate = v->arch.xsave_area;
+
+ memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
+ xrstor(v, mask);
+}
+
void xstate_set_init(uint64_t mask)
{
unsigned long cr0 = read_cr0();
unsigned long xcr0 = this_cpu(xcr0);
- struct vcpu *v = idle_vcpu[smp_processor_id()];
- struct xsave_struct *xstate = v->arch.xsave_area;
if ( ~xfeature_mask & mask )
{
@@ -792,8 +800,7 @@ void xstate_set_init(uint64_t mask)
clts();
- memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
- xrstor(v, mask);
+ xstate_load_init(mask);
if ( cr0 & X86_CR0_TS )
write_cr0(cr0);
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -559,6 +559,11 @@ struct arch_vcpu
* it explicitly enables it via xcr0.
*/
uint64_t xcr0_accum;
+ /*
+ * Accumulated set of components which may currently be dirty, and hence
+ * should be cleared immediately after saving state.
+ */
+ uint64_t xstate_dirty;
/* This variable determines whether nonlazy extended state has been used,
* and thus should be saved/restored. */
bool_t nonlazy_xstate_used;
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -95,6 +95,7 @@ uint64_t get_msr_xss(void);
uint64_t read_bndcfgu(void);
void xsave(struct vcpu *v, uint64_t mask);
void xrstor(struct vcpu *v, uint64_t mask);
+void xstate_load_init(uint64_t mask);
void xstate_set_init(uint64_t mask);
bool xsave_enabled(const struct vcpu *v);
int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum,
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |