[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86/xsave: fix migration from xsave-capable to xsave-incapable host



Jan Beulich wrote on 2013-08-30:
> With CPUID features suitably masked this is supposed to work, but was
> completely broken (i.e. the case wasn't even considered when the
> original xsave save/restore code was written).
> 
> First of all, xsave_enabled() wrongly returned the value of
> cpu_has_xsave, i.e. not even taking into consideration attributes of
> the vCPU in question. Instead this function ought to check whether the
> guest ever enabled xsave support (by writing a [non-zero] value to
> XCR0). As a result of this, a vCPU's xcr0 and xcr0_accum must no longer
> be initialized to XSTATE_FP_SSE (since that's a valid value a guest
> could write to XCR0), and the xsave/xrstor as well as the context
> switch code need to suitably account for this (by always enforcing at
> least this part of the state to be saved/loaded).
> 
> This involves undoing large parts of c/s 22945:13a7d1f7f62c ("x86: add
> strictly sanity check for XSAVE/XRSTOR") - we need to cleanly
> distinguish between hardware capabilities and vCPU used features.
> 
> Next both HVM and PV save code needed tweaking to not always save the
> full state supported by the underlying hardware, but just the parts
> that the guest actually used. Similarly the restore code should bail
> not just on state being restored that the hardware cannot handle, but
> also on inconsistent save state (inconsistent XCR0 settings or size of
> saved state not in line with XCR0).
> 
> And finally the PV extended context get/set code needs to use slightly
> different logic than the HVM one, as here we can't just key off of
> xsave_enabled() (i.e. avoid doing anything if a guest doesn't use
> xsave) because the tools use this function to determine host
> capabilities as well as read/write vCPU state. The set operation in
> particular needs to be capable of cleanly dealing with input that
> consists of only the xcr0 and xcr0_accum values (if they're both zero
> then no further data is required).
> 
> While for things to work correctly both sides (saving _and_ restoring
> host) need to run with the fixed code, afaict no breakage should occur
> if either side isn't up to date (other than the breakage that this
> patch attempts to fix).
It looks good. But the title confused me before I saw your reply.

Reviewed-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>

> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> Please note that the development and testing of this was done on 4.1.x,
> as that's where to issue was found. The xsave code being quite
> different between then and now, the porting was not really straight
> forward, but in the end the biggest difference was that the 4.1.x code
> needs more changes than presented here, due to there FPU context and
> XSAVE area not sharing the same memory block. Hence I think/hope I did
> all the porting over correctly, but I had no setup available where I
> could have myself fully tested all the scenarios.
> 
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -618,7 +618,7 @@ unsigned long pv_guest_cr4_fixup(const s
>          hv_cr4_mask &= ~X86_CR4_DE; if ( cpu_has_fsgsbase &&
>          !is_pv_32bit_domain(v->domain) ) hv_cr4_mask &=
>          ~X86_CR4_FSGSBASE;
> -    if ( xsave_enabled(v) )
> +    if ( cpu_has_xsave )
>          hv_cr4_mask &= ~X86_CR4_OSXSAVE;
>      if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) ) @@
>      -1351,9 +1351,13 @@ static void __context_switch(void) if (
>      !is_idle_vcpu(n) ) {
>          memcpy(stack_regs, &n->arch.user_regs,
> CTXT_SWITCH_STACK_BYTES);
> -        if ( xsave_enabled(n) && n->arch.xcr0 != get_xcr0() &&
> -             !set_xcr0(n->arch.xcr0) )
> -            BUG();
> +        if ( cpu_has_xsave )
> +        {
> +            u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE;
> +
> +            if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
> +                BUG();
> +        }
>          vcpu_restore_fpu_eager(n);
>          n->arch.ctxt_switch_to(n);
>      }
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1047,11 +1047,8 @@ long arch_do_domctl(
>          struct xen_domctl_vcpuextstate *evc;
>          struct vcpu *v;
>          uint32_t offset = 0;
> -        uint64_t _xfeature_mask = 0;
> -        uint64_t _xcr0, _xcr0_accum;
> -        void *receive_buf = NULL, *_xsave_area;
> 
> -#define PV_XSAVE_SIZE (2 * sizeof(uint64_t) + xsave_cntxt_size)
> +#define PV_XSAVE_SIZE(xcr0) (2 * sizeof(uint64_t) + xstate_ctxt_size(xcr0))
> 
>          evc = &domctl->u.vcpuextstate;
> @@ -1062,15 +1059,16 @@ long arch_do_domctl(
> 
>          if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate )
>          {
> +            unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum);
> +
>              if ( !evc->size && !evc->xfeature_mask )
>              {
>                  evc->xfeature_mask = xfeature_mask;
> -                evc->size = PV_XSAVE_SIZE;
> +                evc->size = size;
>                  ret = 0;
>                  goto vcpuextstate_out;
>              }
> -            if ( evc->size != PV_XSAVE_SIZE ||
> -                 evc->xfeature_mask != xfeature_mask )
> +            if ( evc->size != size || evc->xfeature_mask != xfeature_mask )
>              {
>                  ret = -EINVAL;
>                  goto vcpuextstate_out;
> @@ -1093,7 +1091,7 @@ long arch_do_domctl(
>              offset += sizeof(v->arch.xcr0_accum);
>              if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer,
>                                        offset, (void
> *)v->arch.xsave_area,
> -                                      xsave_cntxt_size) )
> +                                      size - 2 * sizeof(uint64_t)) )
>              {
>                  ret = -EFAULT;
>                  goto vcpuextstate_out;
> @@ -1101,13 +1099,14 @@ long arch_do_domctl(
>          }
>          else
>          {
> -            ret = -EINVAL;
> +            void *receive_buf;
> +            uint64_t _xcr0, _xcr0_accum;
> +            const struct xsave_struct *_xsave_area;
> 
> -            _xfeature_mask = evc->xfeature_mask; -            /* xsave
> context must be restored on compatible target CPUs */ -            if (
> (_xfeature_mask & xfeature_mask) != _xfeature_mask ) -               
> goto vcpuextstate_out; -            if ( evc->size > PV_XSAVE_SIZE ||
> evc->size < 2 * sizeof(uint64_t) ) +            ret = -EINVAL; +        
>    if ( evc->size < 2 * sizeof(uint64_t) || +                 evc->size
> > 2 * sizeof(uint64_t) + +                            
> xstate_ctxt_size(xfeature_mask) )
>                  goto vcpuextstate_out;
>              receive_buf = xmalloc_bytes(evc->size); @@ -1128,20
>              +1127,30 @@ long arch_do_domctl( _xcr0_accum = *(uint64_t
>              *)(receive_buf + sizeof(uint64_t)); _xsave_area =
>              receive_buf + 2 * sizeof(uint64_t);
> -            if ( !(_xcr0 & XSTATE_FP) || _xcr0 & ~xfeature_mask )
> +            if ( _xcr0_accum )
>              {
> -                xfree(receive_buf);
> -                goto vcpuextstate_out;
> +                if ( evc->size >= 2 * sizeof(uint64_t) +
> XSTATE_AREA_MIN_SIZE )
> +                    ret = validate_xstate(_xcr0, _xcr0_accum,
> +
> _xsave_area->xsave_hdr.xstate_bv,
> +                                          evc->xfeature_mask);
>              }
> -            if ( (_xcr0 & _xcr0_accum) != _xcr0 )
> +            else if ( !_xcr0 )
> +                ret = 0;
> +            if ( ret )
>              {
>                  xfree(receive_buf);
>                  goto vcpuextstate_out;
>              }
> -            v->arch.xcr0 = _xcr0;
> -            v->arch.xcr0_accum = _xcr0_accum;
> -            memcpy(v->arch.xsave_area, _xsave_area, evc->size - 2 *
> sizeof(uint64_t) );
> +            if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) )
> +            {
> +                v->arch.xcr0 = _xcr0;
> +                v->arch.xcr0_accum = _xcr0_accum;
> +                memcpy(v->arch.xsave_area, _xsave_area,
> +                       evc->size - 2 * sizeof(uint64_t));
> +            }
> +            else
> +                ret = -EINVAL;
> 
>              xfree(receive_buf);
>          }
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -906,14 +906,12 @@ static int hvm_load_cpu_ctxt(struct doma
>      hvm_set_segment_register(v, x86_seg_ldtr, &seg);
>      
>      /* In case xsave-absent save file is restored on a xsave-capable host */
> -    if ( xsave_enabled(v) )
> +    if ( cpu_has_xsave && !xsave_enabled(v) )
>      {
>          struct xsave_struct *xsave_area = v->arch.xsave_area;
>          
>          memcpy(v->arch.xsave_area, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
>          xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE;
> -        v->arch.xcr0_accum = XSTATE_FP_SSE;
> -        v->arch.xcr0 = XSTATE_FP_SSE;
>      }
>      else
>          memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs));
> @@ -957,7 +955,9 @@ static int hvm_load_cpu_ctxt(struct doma
>  HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt,
> hvm_load_cpu_ctxt,
>                            1, HVMSR_PER_VCPU);
> -#define HVM_CPU_XSAVE_SIZE  (3 * sizeof(uint64_t) + xsave_cntxt_size)
> +#define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \
> +                                           save_area) + \
> +                                  xstate_ctxt_size(xcr0))
> 
>  static int hvm_save_cpu_xsave_states(struct domain *d,
>  hvm_domain_context_t *h) {
> @@ -969,20 +969,20 @@ static int hvm_save_cpu_xsave_states(str
> 
>      for_each_vcpu ( d, v )
>      {
> +        unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum);
> +
>          if ( !xsave_enabled(v) )
>              continue;
> -        if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id,
> HVM_CPU_XSAVE_SIZE) )
> +        if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size) )
>              return 1;
>          ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
> -        h->cur += HVM_CPU_XSAVE_SIZE;
> -        memset(ctxt, 0, HVM_CPU_XSAVE_SIZE);
> +        h->cur += size;
> 
>          ctxt->xfeature_mask = xfeature_mask;
>          ctxt->xcr0 = v->arch.xcr0;
>          ctxt->xcr0_accum = v->arch.xcr0_accum;
> -        if ( v->fpu_initialised )
> -            memcpy(&ctxt->save_area,
> -                v->arch.xsave_area, xsave_cntxt_size);
> +        memcpy(&ctxt->save_area, v->arch.xsave_area,
> +               size - offsetof(struct hvm_hw_cpu_xsave, save_area));
>      }
>      
>      return 0;
> @@ -990,11 +990,11 @@ static int hvm_save_cpu_xsave_states(str
> 
>  static int hvm_load_cpu_xsave_states(struct domain *d,
>  hvm_domain_context_t *h) {
> -    int vcpuid;
> +    unsigned int vcpuid, size;
> +    int err;
>      struct vcpu *v;
>      struct hvm_hw_cpu_xsave *ctxt;
>      struct hvm_save_descriptor *desc;
> -    uint64_t _xfeature_mask;
> 
>      /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); @@
>      -1006,47 +1006,74 @@ static int hvm_load_cpu_xsave_states(str }
>      
>      /* Fails since we can't restore an img saved on xsave-capable host. */
> -    if ( !xsave_enabled(v) )
> -        return -EINVAL;
> +    if ( !cpu_has_xsave )
> +        return -EOPNOTSUPP;
> 
>      /* Customized checking for entry since our entry is of variable length */
>      desc = (struct hvm_save_descriptor *)&h->data[h->cur];
>      if ( sizeof (*desc) > h->size - h->cur)
>      {
>          printk(XENLOG_G_WARNING
> -               "HVM%d restore: not enough data left to read descriptor"
> -               "for type %u\n", d->domain_id, CPU_XSAVE_CODE); -       
> return -1; +               "HVM%d.%d restore: not enough data left to
> read xsave descriptor\n", +               d->domain_id, vcpuid); +      
>  return -ENODATA;
>      }
>      if ( desc->length + sizeof (*desc) > h->size - h->cur)
>      {
>          printk(XENLOG_G_WARNING
> -               "HVM%d restore: not enough data left to read %u bytes "
> -               "for type %u\n", d->domain_id, desc->length,
> CPU_XSAVE_CODE); -        return -1; +               "HVM%d.%d restore:
> not enough data left to read %u xsave bytes\n", +              
> d->domain_id, vcpuid, desc->length); +        return -ENODATA; +    } + 
>   if ( desc->length < offsetof(struct hvm_hw_cpu_xsave, save_area) + +  
>                      XSTATE_AREA_MIN_SIZE ) +    { +       
> printk(XENLOG_G_WARNING +               "HVM%d.%d restore mismatch:
> xsave length %u < %zu\n", +               d->domain_id, vcpuid,
> desc->length, +               offsetof(struct hvm_hw_cpu_xsave, +       
>                 save_area) + XSTATE_AREA_MIN_SIZE); +        return
> -EINVAL;
>      }
> -    if ( CPU_XSAVE_CODE != desc->typecode || (desc->length >
> HVM_CPU_XSAVE_SIZE) )
> +    size = HVM_CPU_XSAVE_SIZE(xfeature_mask);
> +    if ( desc->length > size )
>      {
>          printk(XENLOG_G_WARNING
> -               "HVM%d restore mismatch: expected type %u with max
> length %u, " -               "saw type %u length %u\n", d->domain_id,
> CPU_XSAVE_CODE, -               (unsigned int)HVM_CPU_XSAVE_SIZE, -     
>          desc->typecode, desc->length); -        return -1; +           
>    "HVM%d.%d restore mismatch: xsave length %u > %u\n", +              
> d->domain_id, vcpuid, desc->length, size); +        return -EOPNOTSUPP;
>      }
>      h->cur += sizeof (*desc);
> -    /* Checking finished */
> 
>      ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
>      h->cur += desc->length;
> -    _xfeature_mask = ctxt->xfeature_mask; -    if ( (_xfeature_mask &
> xfeature_mask) != _xfeature_mask ) -        return -EINVAL; +    err =
> validate_xstate(ctxt->xcr0, ctxt->xcr0_accum, +                         
> ctxt->save_area.xsave_hdr.xstate_bv, +                         
> ctxt->xfeature_mask); +    if ( err ) +    { +       
> printk(XENLOG_G_WARNING +               "HVM%d.%d restore: inconsistent
> xsave state (feat=%#"PRIx64 +               " accum=%#"PRIx64"
> xcr0=%#"PRIx64" bv=%#"PRIx64" err=%d)\n", +               d->domain_id,
> vcpuid, ctxt->xfeature_mask, ctxt->xcr0_accum, +              
> ctxt->xcr0, ctxt->save_area.xsave_hdr.xstate_bv, err); +        return
> err; +    } +    size = HVM_CPU_XSAVE_SIZE(ctxt->xcr0_accum); +    if (
> desc->length > size ) +    { +        printk(XENLOG_G_WARNING +         
>      "HVM%d.%d restore mismatch: xsave length %u > %u\n", +             
>  d->domain_id, vcpuid, desc->length, size); +        return -EOPNOTSUPP;
> +    } +    /* Checking finished */
> 
>      v->arch.xcr0 = ctxt->xcr0;
>      v->arch.xcr0_accum = ctxt->xcr0_accum;
> -    memcpy(v->arch.xsave_area, &ctxt->save_area, xsave_cntxt_size);
> +    memcpy(v->arch.xsave_area, &ctxt->save_area,
> +           desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area));
> 
>      return 0;
>  }
> @@ -1060,7 +1087,8 @@ static int __init __hvm_register_CPU_XSA
>                          "CPU_XSAVE",
>                          hvm_save_cpu_xsave_states,
>                          hvm_load_cpu_xsave_states,
> -                        HVM_CPU_XSAVE_SIZE + sizeof (struct
> hvm_save_descriptor), +                       
> HVM_CPU_XSAVE_SIZE(xfeature_mask) + +                           
> sizeof(struct hvm_save_descriptor),
>                          HVMSR_PER_VCPU);
>      return 0;
>  }
> @@ -2767,7 +2795,7 @@ void hvm_cpuid(unsigned int input, unsig
>              __clear_bit(X86_FEATURE_APIC & 31, edx);
>          /* Fix up OSXSAVE. */
> -        if ( xsave_enabled(v) )
> +        if ( cpu_has_xsave )
>              *ecx |= (v->arch.hvm_vcpu.guest_cr[4] &
> X86_CR4_OSXSAVE) ?
>                       cpufeat_mask(X86_FEATURE_OSXSAVE) : 0;
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -944,8 +944,7 @@ static int construct_vmcs(struct vcpu *v
>      /* Host control registers. */
>      v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
>      __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
> -    __vmwrite(HOST_CR4, -              mmu_cr4_features |
> (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0)); +    __vmwrite(HOST_CR4,
> mmu_cr4_features);
> 
>      /* Host CS:RIP. */
>      __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -38,14 +38,15 @@ static inline void fpu_xrstor(struct vcp
>  {
>      bool_t ok;
> +    ASSERT(v->arch.xsave_area);
>      /*
>       * XCR0 normally represents what guest OS set. In case of Xen itself,
> -     * we set all supported feature mask before doing save/restore.
> +     * we set the accumulated feature mask before doing save/restore.
>       */
> -    ok = set_xcr0(v->arch.xcr0_accum);
> +    ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
>      ASSERT(ok);
>      xrstor(v, mask);
> -    ok = set_xcr0(v->arch.xcr0);
> +    ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
>      ASSERT(ok);
>  }
> @@ -137,13 +138,15 @@ static inline void fpu_xsave(struct vcpu
>  {
>      bool_t ok;
> -    /* XCR0 normally represents what guest OS set. In case of Xen itself,
> -     * we set all accumulated feature mask before doing save/restore.
> +    ASSERT(v->arch.xsave_area);
> +    /*
> +     * XCR0 normally represents what guest OS set. In case of Xen itself,
> +     * we set the accumulated feature mask before doing save/restore.
>       */
> -    ok = set_xcr0(v->arch.xcr0_accum);
> +    ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
>      ASSERT(ok);
>      xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY);
> -    ok = set_xcr0(v->arch.xcr0);
> +    ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
>      ASSERT(ok);
>  }
> @@ -232,7 +235,7 @@ void vcpu_restore_fpu_lazy(struct vcpu *
>      if ( v->fpu_dirtied )
>          return;
> -    if ( xsave_enabled(v) )
> +    if ( cpu_has_xsave )
>          fpu_xrstor(v, XSTATE_LAZY);
>      else if ( v->fpu_initialised ) { @@ -262,7 +265,7 @@ void
>      vcpu_save_fpu(struct vcpu *v) /* This can happen, if a
>      paravirtualised guest OS has set its CR0.TS. */ clts();
> -    if ( xsave_enabled(v) )
> +    if ( cpu_has_xsave )
>          fpu_xsave(v); else if ( cpu_has_fxsr ) fpu_fxsave(v);
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -817,7 +817,7 @@ static void pv_cpuid(struct cpu_user_reg
>          __clear_bit(X86_FEATURE_PDCM % 32, &c);
>          __clear_bit(X86_FEATURE_PCID % 32, &c);
>          __clear_bit(X86_FEATURE_DCA % 32, &c);
> -        if ( !xsave_enabled(current) )
> +        if ( !cpu_has_xsave )
>          {
>              __clear_bit(X86_FEATURE_XSAVE % 32, &c);
>              __clear_bit(X86_FEATURE_AVX % 32, &c);
> @@ -842,7 +842,7 @@ static void pv_cpuid(struct cpu_user_reg
>          break;
>      case 0x0000000d: /* XSAVE */
> -        if ( !xsave_enabled(current) )
> +        if ( !cpu_has_xsave )
>              goto unsupported;
>          break;
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -21,7 +21,7 @@ bool_t __read_mostly cpu_has_xsaveopt;
>   * the supported and enabled features on the processor, including the
>   * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
>   */
> -u32 xsave_cntxt_size;
> +static u32 __read_mostly xsave_cntxt_size;
> 
>  /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
>  u64 xfeature_mask;
> @@ -206,13 +206,13 @@ void xrstor(struct vcpu *v, uint64_t mas
> 
>  bool_t xsave_enabled(const struct vcpu *v)
>  {
> -    if ( cpu_has_xsave )
> -    {
> -        ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
> -        ASSERT(v->arch.xsave_area);
> -    }
> +    if ( !cpu_has_xsave )
> +        return 0;
> 
> -    return cpu_has_xsave;
> +    ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
> +    ASSERT(v->arch.xsave_area);
> +
> +    return !!v->arch.xcr0_accum;
>  }
>  
>  int xstate_alloc_save_area(struct vcpu *v)
> @@ -238,8 +238,8 @@ int xstate_alloc_save_area(struct vcpu *
>      save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
>      
>      v->arch.xsave_area = save_area;
> -    v->arch.xcr0 = XSTATE_FP_SSE;
> -    v->arch.xcr0_accum = XSTATE_FP_SSE;
> +    v->arch.xcr0 = 0;
> +    v->arch.xcr0_accum = 0;
> 
>      return 0;
>  }
> @@ -257,7 +257,11 @@ void xstate_init(bool_t bsp)
>      u64 feature_mask;
>      
>      if ( boot_cpu_data.cpuid_level < XSTATE_CPUID )
> +    {
> +        BUG_ON(!bsp);
> +        setup_clear_cpu_cap(X86_FEATURE_XSAVE);
>          return;
> +    }
> 
>      cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> @@ -277,7 +281,6 @@ void xstate_init(bool_t bsp)
>      set_in_cr4(X86_CR4_OSXSAVE);
>      if ( !set_xcr0(feature_mask) )
>          BUG();
> -    cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
> 
>      if ( bsp )
>      {
> @@ -286,14 +289,14 @@ void xstate_init(bool_t bsp)
>           * xsave_cntxt_size is the max size required by enabled
>           features. * We know FP/SSE and YMM about eax, and nothing
>           about edx at present. */
> -        xsave_cntxt_size = ebx;
> +        xsave_cntxt_size = xstate_ctxt_size(feature_mask);
>          printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
>              __func__, xsave_cntxt_size, xfeature_mask);
>      }
>      else
>      {
>          BUG_ON(xfeature_mask != feature_mask);
> -        BUG_ON(xsave_cntxt_size != ebx);
> +        BUG_ON(xsave_cntxt_size != xstate_ctxt_size(feature_mask));
>      }
>      
>      /* Check XSAVEOPT feature. */
> @@ -304,6 +307,42 @@ void xstate_init(bool_t bsp)
>          BUG_ON(!cpu_has_xsaveopt != !(eax &
> XSTATE_FEATURE_XSAVEOPT));
>  }
> +unsigned int xstate_ctxt_size(u64 xcr0) +{ +    u32 ebx = 0; + +    if
> ( xcr0 ) +    { +        u64 act_xcr0 = get_xcr0(); +        u32 eax,
> ecx, edx; +        bool_t ok = set_xcr0(xcr0); + +        ASSERT(ok); + 
>       cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); +       
> ASSERT(ebx <= ecx); +        ok = set_xcr0(act_xcr0); +       
> ASSERT(ok); +    } + +    return ebx; +} + +int validate_xstate(u64
> xcr0, u64 xcr0_accum, u64 xstate_bv, u64 xfeat_mask) +{ +    if (
> (xcr0_accum & ~xfeat_mask) || +         (xstate_bv & ~xcr0_accum) || +  
>       (xcr0 & ~xcr0_accum) || +         !(xcr0 & XSTATE_FP) || +        
> ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) || +         ((xcr0_accum
> & XSTATE_YMM) && !(xcr0_accum & XSTATE_SSE)) ) +        return -EINVAL;
> + +    if ( xcr0_accum & ~xfeature_mask ) +        return -EOPNOTSUPP; +
> +    return 0; +} +
>  int handle_xsetbv(u32 index, u64 new_bv)
>  {
>      struct vcpu *curr = current;
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -456,9 +456,9 @@ unsigned long pv_guest_cr4_fixup(const s
>  #define pv_guest_cr4_to_real_cr4(v)                         \
>      (((v)->arch.pv_vcpu.ctrlreg[4]                          \
>        | (mmu_cr4_features                                   \
> -         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP))      \
> -      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)          \
> -      | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0))          \
> +         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
> +            X86_CR4_OSXSAVE))                               \
> +      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
>       & ~X86_CR4_DE)
>  #define real_cr4_to_pv_guest_cr4(c)                         \
>      ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -368,7 +368,7 @@ static inline int hvm_event_pending(stru
>          ((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\
>                        ? X86_CR4_VMXE : 0)  |             \
>          (cpu_has_pcid ? X86_CR4_PCIDE : 0) |             \
> -        (xsave_enabled(_v) ? X86_CR4_OSXSAVE : 0))))
> +        (cpu_has_xsave ? X86_CR4_OSXSAVE : 0))))
> 
>  /* These exceptions must always be intercepted. */
>  #define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U <<
> TRAP_invalid_op))
> --- a/xen/include/asm-x86/xstate.h
> +++ b/xen/include/asm-x86/xstate.h
> @@ -34,7 +34,6 @@
>  #define XSTATE_NONLAZY (XSTATE_LWP)
>  #define XSTATE_LAZY    (XSTATE_ALL & ~XSTATE_NONLAZY)
> -extern unsigned int xsave_cntxt_size;
>  extern u64 xfeature_mask;
>  
>  /* extended state save area */ @@ -77,11 +76,14 @@ uint64_t
>  get_xcr0(void); void xsave(struct vcpu *v, uint64_t mask); void
>  xrstor(struct vcpu *v, uint64_t mask); bool_t xsave_enabled(const
>  struct vcpu *v);
> +int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv,
> +                                 u64 xfeat_mask);
>  int __must_check handle_xsetbv(u32 index, u64 new_bv);
>  
>  /* extended state init and cleanup functions */
>  void xstate_free_save_area(struct vcpu *v);
>  int xstate_alloc_save_area(struct vcpu *v);
>  void xstate_init(bool_t bsp);
> +unsigned int xstate_ctxt_size(u64 xcr0);
> 
>  #endif /* __ASM_XSTATE_H */
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel


Best regards,
Yang


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.