|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86/xsave: fix migration from xsave-capable to xsave-incapable host
On 30/08/2013 10:55, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:
> With CPUID features suitably masked this is supposed to work, but was
> completely broken (i.e. the case wasn't even considered when the
> original xsave save/restore code was written).
>
> First of all, xsave_enabled() wrongly returned the value of
> cpu_has_xsave, i.e. not even taking into consideration attributes of
> the vCPU in question. Instead this function ought to check whether the
> guest ever enabled xsave support (by writing a [non-zero] value to
> XCR0). As a result of this, a vCPU's xcr0 and xcr0_accum must no longer
> be initialized to XSTATE_FP_SSE (since that's a valid value a guest
> could write to XCR0), and the xsave/xrstor as well as the context
> switch code need to suitably account for this (by always enforcing at
> least this part of the state to be saved/loaded).
>
> This involves undoing large parts of c/s 22945:13a7d1f7f62c ("x86: add
> strictly sanity check for XSAVE/XRSTOR") - we need to cleanly
> distinguish between hardware capabilities and vCPU used features.
>
> Next both HVM and PV save code needed tweaking to not always save the
> full state supported by the underlying hardware, but just the parts
> that the guest actually used. Similarly the restore code should bail
> not just on state being restored that the hardware cannot handle, but
> also on inconsistent save state (inconsistent XCR0 settings or size of
> saved state not in line with XCR0).
>
> And finally the PV extended context get/set code needs to use slightly
> different logic than the HVM one, as here we can't just key off of
> xsave_enabled() (i.e. avoid doing anything if a guest doesn't use
> xsave) because the tools use this function to determine host
> capabilities as well as read/write vCPU state. The set operation in
> particular needs to be capable of cleanly dealing with input that
> consists of only the xcr0 and xcr0_accum values (if they're both zero
> then no further data is required).
>
> While for things to work correctly both sides (saving _and_ restoring
> host) need to run with the fixed code, afaict no breakage should occur
> if either side isn't up to date (other than the breakage that this
> patch attempts to fix).
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: Keir Fraser <keir@xxxxxxx>
> ---
> Please note that the development and testing of this was done on 4.1.x,
> as that's where to issue was found. The xsave code being quite
> different between then and now, the porting was not really straight
> forward, but in the end the biggest difference was that the 4.1.x code
> needs more changes than presented here, due to there FPU context and
> XSAVE area not sharing the same memory block. Hence I think/hope I did
> all the porting over correctly, but I had no setup available where I
> could have myself fully tested all the scenarios.
>
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -618,7 +618,7 @@ unsigned long pv_guest_cr4_fixup(const s
> hv_cr4_mask &= ~X86_CR4_DE;
> if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) )
> hv_cr4_mask &= ~X86_CR4_FSGSBASE;
> - if ( xsave_enabled(v) )
> + if ( cpu_has_xsave )
> hv_cr4_mask &= ~X86_CR4_OSXSAVE;
>
> if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) )
> @@ -1351,9 +1351,13 @@ static void __context_switch(void)
> if ( !is_idle_vcpu(n) )
> {
> memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
> - if ( xsave_enabled(n) && n->arch.xcr0 != get_xcr0() &&
> - !set_xcr0(n->arch.xcr0) )
> - BUG();
> + if ( cpu_has_xsave )
> + {
> + u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE;
> +
> + if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
> + BUG();
> + }
> vcpu_restore_fpu_eager(n);
> n->arch.ctxt_switch_to(n);
> }
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1047,11 +1047,8 @@ long arch_do_domctl(
> struct xen_domctl_vcpuextstate *evc;
> struct vcpu *v;
> uint32_t offset = 0;
> - uint64_t _xfeature_mask = 0;
> - uint64_t _xcr0, _xcr0_accum;
> - void *receive_buf = NULL, *_xsave_area;
>
> -#define PV_XSAVE_SIZE (2 * sizeof(uint64_t) + xsave_cntxt_size)
> +#define PV_XSAVE_SIZE(xcr0) (2 * sizeof(uint64_t) + xstate_ctxt_size(xcr0))
>
> evc = &domctl->u.vcpuextstate;
>
> @@ -1062,15 +1059,16 @@ long arch_do_domctl(
>
> if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
> {
> + unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
> +
> if ( !evc->size && !evc->xfeature_mask )
> {
> evc->xfeature_mask = xfeature_mask;
> - evc->size = PV_XSAVE_SIZE;
> + evc->size = size;
> ret = 0;
> goto vcpuextstate_out;
> }
> - if ( evc->size != PV_XSAVE_SIZE ||
> - evc->xfeature_mask != xfeature_mask )
> + if ( evc->size != size || evc->xfeature_mask != xfeature_mask )
> {
> ret = -EINVAL;
> goto vcpuextstate_out;
> @@ -1093,7 +1091,7 @@ long arch_do_domctl(
> offset += sizeof(v->arch.xcr0_accum);
> if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
> offset, (void *)v->arch.xsave_area,
> - xsave_cntxt_size) )
> + size - 2 * sizeof(uint64_t)) )
> {
> ret = -EFAULT;
> goto vcpuextstate_out;
> @@ -1101,13 +1099,14 @@ long arch_do_domctl(
> }
> else
> {
> - ret = -EINVAL;
> + void *receive_buf;
> + uint64_t _xcr0, _xcr0_accum;
> + const struct xsave_struct *_xsave_area;
>
> - _xfeature_mask = evc->xfeature_mask;
> - /* xsave context must be restored on compatible target CPUs */
> - if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask )
> - goto vcpuextstate_out;
> - if ( evc->size > PV_XSAVE_SIZE || evc->size < 2 *
> sizeof(uint64_t) )
> + ret = -EINVAL;
> + if ( evc->size < 2 * sizeof(uint64_t) ||
> + evc->size > 2 * sizeof(uint64_t) +
> + xstate_ctxt_size(xfeature_mask) )
> goto vcpuextstate_out;
>
> receive_buf = xmalloc_bytes(evc->size);
> @@ -1128,20 +1127,30 @@ long arch_do_domctl(
> _xcr0_accum = *(uint64_t *)(receive_buf + sizeof(uint64_t));
> _xsave_area = receive_buf + 2 * sizeof(uint64_t);
>
> - if ( !(_xcr0 & XSTATE_FP) || _xcr0 & ~xfeature_mask )
> + if ( _xcr0_accum )
> {
> - xfree(receive_buf);
> - goto vcpuextstate_out;
> + if ( evc->size >= 2 * sizeof(uint64_t) + XSTATE_AREA_MIN_SIZE
> )
> + ret = validate_xstate(_xcr0, _xcr0_accum,
> + _xsave_area->xsave_hdr.xstate_bv,
> + evc->xfeature_mask);
> }
> - if ( (_xcr0 & _xcr0_accum) != _xcr0 )
> + else if ( !_xcr0 )
> + ret = 0;
> + if ( ret )
> {
> xfree(receive_buf);
> goto vcpuextstate_out;
> }
>
> - v->arch.xcr0 = _xcr0;
> - v->arch.xcr0_accum = _xcr0_accum;
> - memcpy(v->arch.xsave_area, _xsave_area, evc->size - 2 *
> sizeof(uint64_t) );
> + if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) )
> + {
> + v->arch.xcr0 = _xcr0;
> + v->arch.xcr0_accum = _xcr0_accum;
> + memcpy(v->arch.xsave_area, _xsave_area,
> + evc->size - 2 * sizeof(uint64_t));
> + }
> + else
> + ret = -EINVAL;
>
> xfree(receive_buf);
> }
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -906,14 +906,12 @@ static int hvm_load_cpu_ctxt(struct doma
> hvm_set_segment_register(v, x86_seg_ldtr, &seg);
>
> /* In case xsave-absent save file is restored on a xsave-capable host */
> - if ( xsave_enabled(v) )
> + if ( cpu_has_xsave && !xsave_enabled(v) )
> {
> struct xsave_struct *xsave_area = v->arch.xsave_area;
>
> memcpy(v->arch.xsave_area, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
> xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
> - v->arch.xcr0_accum = XSTATE_FP_SSE;
> - v->arch.xcr0 = XSTATE_FP_SSE;
> }
> else
> memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
> @@ -957,7 +955,9 @@ static int hvm_load_cpu_ctxt(struct doma
> HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
> 1, HVMSR_PER_VCPU);
>
> -#define HVM_CPU_XSAVE_SIZE (3 * sizeof(uint64_t) + xsave_cntxt_size)
> +#define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \
> + save_area) + \
> + xstate_ctxt_size(xcr0))
>
> static int hvm_save_cpu_xsave_states(struct domain *d, hvm_domain_context_t
> *h)
> {
> @@ -969,20 +969,20 @@ static int hvm_save_cpu_xsave_states(str
>
> for_each_vcpu ( d, v )
> {
> + unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum);
> +
> if ( !xsave_enabled(v) )
> continue;
> - if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id,
> HVM_CPU_XSAVE_SIZE) )
> + if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size) )
> return 1;
> ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
> - h->cur += HVM_CPU_XSAVE_SIZE;
> - memset(ctxt, 0, HVM_CPU_XSAVE_SIZE);
> + h->cur += size;
>
> ctxt->xfeature_mask = xfeature_mask;
> ctxt->xcr0 = v->arch.xcr0;
> ctxt->xcr0_accum = v->arch.xcr0_accum;
> - if ( v->fpu_initialised )
> - memcpy(&ctxt->save_area,
> - v->arch.xsave_area, xsave_cntxt_size);
> + memcpy(&ctxt->save_area, v->arch.xsave_area,
> + size - offsetof(struct hvm_hw_cpu_xsave, save_area));
> }
>
> return 0;
> @@ -990,11 +990,11 @@ static int hvm_save_cpu_xsave_states(str
>
> static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t
> *h)
> {
> - int vcpuid;
> + unsigned int vcpuid, size;
> + int err;
> struct vcpu *v;
> struct hvm_hw_cpu_xsave *ctxt;
> struct hvm_save_descriptor *desc;
> - uint64_t _xfeature_mask;
>
> /* Which vcpu is this? */
> vcpuid = hvm_load_instance(h);
> @@ -1006,47 +1006,74 @@ static int hvm_load_cpu_xsave_states(str
> }
>
> /* Fails since we can't restore an img saved on xsave-capable host. */
> - if ( !xsave_enabled(v) )
> - return -EINVAL;
> + if ( !cpu_has_xsave )
> + return -EOPNOTSUPP;
>
> /* Customized checking for entry since our entry is of variable length */
> desc = (struct hvm_save_descriptor *)&h->data[h->cur];
> if ( sizeof (*desc) > h->size - h->cur)
> {
> printk(XENLOG_G_WARNING
> - "HVM%d restore: not enough data left to read descriptor"
> - "for type %u\n", d->domain_id, CPU_XSAVE_CODE);
> - return -1;
> + "HVM%d.%d restore: not enough data left to read xsave
> descriptor\n",
> + d->domain_id, vcpuid);
> + return -ENODATA;
> }
> if ( desc->length + sizeof (*desc) > h->size - h->cur)
> {
> printk(XENLOG_G_WARNING
> - "HVM%d restore: not enough data left to read %u bytes "
> - "for type %u\n", d->domain_id, desc->length, CPU_XSAVE_CODE);
> - return -1;
> + "HVM%d.%d restore: not enough data left to read %u xsave
> bytes\n",
> + d->domain_id, vcpuid, desc->length);
> + return -ENODATA;
> + }
> + if ( desc->length < offsetof(struct hvm_hw_cpu_xsave, save_area) +
> + XSTATE_AREA_MIN_SIZE )
> + {
> + printk(XENLOG_G_WARNING
> + "HVM%d.%d restore mismatch: xsave length %u < %zu\n",
> + d->domain_id, vcpuid, desc->length,
> + offsetof(struct hvm_hw_cpu_xsave,
> + save_area) + XSTATE_AREA_MIN_SIZE);
> + return -EINVAL;
> }
> - if ( CPU_XSAVE_CODE != desc->typecode || (desc->length >
> HVM_CPU_XSAVE_SIZE) )
> + size = HVM_CPU_XSAVE_SIZE(xfeature_mask);
> + if ( desc->length > size )
> {
> printk(XENLOG_G_WARNING
> - "HVM%d restore mismatch: expected type %u with max length %u,
> "
> - "saw type %u length %u\n", d->domain_id, CPU_XSAVE_CODE,
> - (unsigned int)HVM_CPU_XSAVE_SIZE,
> - desc->typecode, desc->length);
> - return -1;
> + "HVM%d.%d restore mismatch: xsave length %u > %u\n",
> + d->domain_id, vcpuid, desc->length, size);
> + return -EOPNOTSUPP;
> }
> h->cur += sizeof (*desc);
> - /* Checking finished */
>
> ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
> h->cur += desc->length;
>
> - _xfeature_mask = ctxt->xfeature_mask;
> - if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask )
> - return -EINVAL;
> + err = validate_xstate(ctxt->xcr0, ctxt->xcr0_accum,
> + ctxt->save_area.xsave_hdr.xstate_bv,
> + ctxt->xfeature_mask);
> + if ( err )
> + {
> + printk(XENLOG_G_WARNING
> + "HVM%d.%d restore: inconsistent xsave state (feat=%#"PRIx64
> + " accum=%#"PRIx64" xcr0=%#"PRIx64" bv=%#"PRIx64" err=%d)\n",
> + d->domain_id, vcpuid, ctxt->xfeature_mask, ctxt->xcr0_accum,
> + ctxt->xcr0, ctxt->save_area.xsave_hdr.xstate_bv, err);
> + return err;
> + }
> + size = HVM_CPU_XSAVE_SIZE(ctxt->xcr0_accum);
> + if ( desc->length > size )
> + {
> + printk(XENLOG_G_WARNING
> + "HVM%d.%d restore mismatch: xsave length %u > %u\n",
> + d->domain_id, vcpuid, desc->length, size);
> + return -EOPNOTSUPP;
> + }
> + /* Checking finished */
>
> v->arch.xcr0 = ctxt->xcr0;
> v->arch.xcr0_accum = ctxt->xcr0_accum;
> - memcpy(v->arch.xsave_area, &ctxt->save_area, xsave_cntxt_size);
> + memcpy(v->arch.xsave_area, &ctxt->save_area,
> + desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area));
>
> return 0;
> }
> @@ -1060,7 +1087,8 @@ static int __init __hvm_register_CPU_XSA
> "CPU_XSAVE",
> hvm_save_cpu_xsave_states,
> hvm_load_cpu_xsave_states,
> - HVM_CPU_XSAVE_SIZE + sizeof (struct
> hvm_save_descriptor),
> + HVM_CPU_XSAVE_SIZE(xfeature_mask) +
> + sizeof(struct hvm_save_descriptor),
> HVMSR_PER_VCPU);
> return 0;
> }
> @@ -2767,7 +2795,7 @@ void hvm_cpuid(unsigned int input, unsig
> __clear_bit(X86_FEATURE_APIC & 31, edx);
>
> /* Fix up OSXSAVE. */
> - if ( xsave_enabled(v) )
> + if ( cpu_has_xsave )
> *ecx |= (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ?
> cpufeat_mask(X86_FEATURE_OSXSAVE) : 0;
>
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -944,8 +944,7 @@ static int construct_vmcs(struct vcpu *v
> /* Host control registers. */
> v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
> __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
> - __vmwrite(HOST_CR4,
> - mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0));
> + __vmwrite(HOST_CR4, mmu_cr4_features);
>
> /* Host CS:RIP. */
> __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -38,14 +38,15 @@ static inline void fpu_xrstor(struct vcp
> {
> bool_t ok;
>
> + ASSERT(v->arch.xsave_area);
> /*
> * XCR0 normally represents what guest OS set. In case of Xen itself,
> - * we set all supported feature mask before doing save/restore.
> + * we set the accumulated feature mask before doing save/restore.
> */
> - ok = set_xcr0(v->arch.xcr0_accum);
> + ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
> ASSERT(ok);
> xrstor(v, mask);
> - ok = set_xcr0(v->arch.xcr0);
> + ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
> ASSERT(ok);
> }
>
> @@ -137,13 +138,15 @@ static inline void fpu_xsave(struct vcpu
> {
> bool_t ok;
>
> - /* XCR0 normally represents what guest OS set. In case of Xen itself,
> - * we set all accumulated feature mask before doing save/restore.
> + ASSERT(v->arch.xsave_area);
> + /*
> + * XCR0 normally represents what guest OS set. In case of Xen itself,
> + * we set the accumulated feature mask before doing save/restore.
> */
> - ok = set_xcr0(v->arch.xcr0_accum);
> + ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
> ASSERT(ok);
> xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY);
> - ok = set_xcr0(v->arch.xcr0);
> + ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
> ASSERT(ok);
> }
>
> @@ -232,7 +235,7 @@ void vcpu_restore_fpu_lazy(struct vcpu *
> if ( v->fpu_dirtied )
> return;
>
> - if ( xsave_enabled(v) )
> + if ( cpu_has_xsave )
> fpu_xrstor(v, XSTATE_LAZY);
> else if ( v->fpu_initialised )
> {
> @@ -262,7 +265,7 @@ void vcpu_save_fpu(struct vcpu *v)
> /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
> clts();
>
> - if ( xsave_enabled(v) )
> + if ( cpu_has_xsave )
> fpu_xsave(v);
> else if ( cpu_has_fxsr )
> fpu_fxsave(v);
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -817,7 +817,7 @@ static void pv_cpuid(struct cpu_user_reg
> __clear_bit(X86_FEATURE_PDCM % 32, &c);
> __clear_bit(X86_FEATURE_PCID % 32, &c);
> __clear_bit(X86_FEATURE_DCA % 32, &c);
> - if ( !xsave_enabled(current) )
> + if ( !cpu_has_xsave )
> {
> __clear_bit(X86_FEATURE_XSAVE % 32, &c);
> __clear_bit(X86_FEATURE_AVX % 32, &c);
> @@ -842,7 +842,7 @@ static void pv_cpuid(struct cpu_user_reg
> break;
>
> case 0x0000000d: /* XSAVE */
> - if ( !xsave_enabled(current) )
> + if ( !cpu_has_xsave )
> goto unsupported;
> break;
>
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -21,7 +21,7 @@ bool_t __read_mostly cpu_has_xsaveopt;
> * the supported and enabled features on the processor, including the
> * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
> */
> -u32 xsave_cntxt_size;
> +static u32 __read_mostly xsave_cntxt_size;
>
> /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
> u64 xfeature_mask;
> @@ -206,13 +206,13 @@ void xrstor(struct vcpu *v, uint64_t mas
>
> bool_t xsave_enabled(const struct vcpu *v)
> {
> - if ( cpu_has_xsave )
> - {
> - ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
> - ASSERT(v->arch.xsave_area);
> - }
> + if ( !cpu_has_xsave )
> + return 0;
>
> - return cpu_has_xsave;
> + ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
> + ASSERT(v->arch.xsave_area);
> +
> + return !!v->arch.xcr0_accum;
> }
>
> int xstate_alloc_save_area(struct vcpu *v)
> @@ -238,8 +238,8 @@ int xstate_alloc_save_area(struct vcpu *
> save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
>
> v->arch.xsave_area = save_area;
> - v->arch.xcr0 = XSTATE_FP_SSE;
> - v->arch.xcr0_accum = XSTATE_FP_SSE;
> + v->arch.xcr0 = 0;
> + v->arch.xcr0_accum = 0;
>
> return 0;
> }
> @@ -257,7 +257,11 @@ void xstate_init(bool_t bsp)
> u64 feature_mask;
>
> if ( boot_cpu_data.cpuid_level < XSTATE_CPUID )
> + {
> + BUG_ON(!bsp);
> + setup_clear_cpu_cap(X86_FEATURE_XSAVE);
> return;
> + }
>
> cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
>
> @@ -277,7 +281,6 @@ void xstate_init(bool_t bsp)
> set_in_cr4(X86_CR4_OSXSAVE);
> if ( !set_xcr0(feature_mask) )
> BUG();
> - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
>
> if ( bsp )
> {
> @@ -286,14 +289,14 @@ void xstate_init(bool_t bsp)
> * xsave_cntxt_size is the max size required by enabled features.
> * We know FP/SSE and YMM about eax, and nothing about edx at
> present.
> */
> - xsave_cntxt_size = ebx;
> + xsave_cntxt_size = xstate_ctxt_size(feature_mask);
> printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
> __func__, xsave_cntxt_size, xfeature_mask);
> }
> else
> {
> BUG_ON(xfeature_mask != feature_mask);
> - BUG_ON(xsave_cntxt_size != ebx);
> + BUG_ON(xsave_cntxt_size != xstate_ctxt_size(feature_mask));
> }
>
> /* Check XSAVEOPT feature. */
> @@ -304,6 +307,42 @@ void xstate_init(bool_t bsp)
> BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT));
> }
>
> +unsigned int xstate_ctxt_size(u64 xcr0)
> +{
> + u32 ebx = 0;
> +
> + if ( xcr0 )
> + {
> + u64 act_xcr0 = get_xcr0();
> + u32 eax, ecx, edx;
> + bool_t ok = set_xcr0(xcr0);
> +
> + ASSERT(ok);
> + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> + ASSERT(ebx <= ecx);
> + ok = set_xcr0(act_xcr0);
> + ASSERT(ok);
> + }
> +
> + return ebx;
> +}
> +
> +int validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv, u64 xfeat_mask)
> +{
> + if ( (xcr0_accum & ~xfeat_mask) ||
> + (xstate_bv & ~xcr0_accum) ||
> + (xcr0 & ~xcr0_accum) ||
> + !(xcr0 & XSTATE_FP) ||
> + ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) ||
> + ((xcr0_accum & XSTATE_YMM) && !(xcr0_accum & XSTATE_SSE)) )
> + return -EINVAL;
> +
> + if ( xcr0_accum & ~xfeature_mask )
> + return -EOPNOTSUPP;
> +
> + return 0;
> +}
> +
> int handle_xsetbv(u32 index, u64 new_bv)
> {
> struct vcpu *curr = current;
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -456,9 +456,9 @@ unsigned long pv_guest_cr4_fixup(const s
> #define pv_guest_cr4_to_real_cr4(v) \
> (((v)->arch.pv_vcpu.ctrlreg[4] \
> | (mmu_cr4_features \
> - & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP)) \
> - | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \
> - | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0)) \
> + & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \
> + X86_CR4_OSXSAVE)) \
> + | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
> & ~X86_CR4_DE)
> #define real_cr4_to_pv_guest_cr4(c) \
> ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -368,7 +368,7 @@ static inline int hvm_event_pending(stru
> ((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\
> ? X86_CR4_VMXE : 0) | \
> (cpu_has_pcid ? X86_CR4_PCIDE : 0) | \
> - (xsave_enabled(_v) ? X86_CR4_OSXSAVE : 0))))
> + (cpu_has_xsave ? X86_CR4_OSXSAVE : 0))))
>
> /* These exceptions must always be intercepted. */
> #define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op))
> --- a/xen/include/asm-x86/xstate.h
> +++ b/xen/include/asm-x86/xstate.h
> @@ -34,7 +34,6 @@
> #define XSTATE_NONLAZY (XSTATE_LWP)
> #define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY)
>
> -extern unsigned int xsave_cntxt_size;
> extern u64 xfeature_mask;
>
> /* extended state save area */
> @@ -77,11 +76,14 @@ uint64_t get_xcr0(void);
> void xsave(struct vcpu *v, uint64_t mask);
> void xrstor(struct vcpu *v, uint64_t mask);
> bool_t xsave_enabled(const struct vcpu *v);
> +int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv,
> + u64 xfeat_mask);
> int __must_check handle_xsetbv(u32 index, u64 new_bv);
>
> /* extended state init and cleanup functions */
> void xstate_free_save_area(struct vcpu *v);
> int xstate_alloc_save_area(struct vcpu *v);
> void xstate_init(bool_t bsp);
> +unsigned int xstate_ctxt_size(u64 xcr0);
>
> #endif /* __ASM_XSTATE_H */
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |