[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5] x86/AMD: Add support for AMD's OSVW feature in guests



On 07/02/2012 11:51, "Jan Beulich" <JBeulich@xxxxxxxx> wrote:

>>>> On 06.02.12 at 18:39, Boris Ostrovsky <boris.ostrovsky@xxxxxxx> wrote:
>> # HG changeset patch
>> # User Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
>> # Date 1328549858 -3600
>> # Node ID 3cf8ffd0ab883dd09f943f4d8fb50f5cc1f04cd5
>> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
>> x86/AMD: Add support for AMD's OSVW feature in guests.
>> 
>> In some cases guests should not provide workarounds for errata even when the
>> physical processor is affected. For example, because of erratum 400 on
>> family
>> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
>> going to idle in order to avoid getting stuck in a non-C0 state. This is not
>> necessary: HLT and IO instructions are intercepted and therefore there is no
>> reason for erratum 400 workaround in the guest.
>> 
>> This patch allows us to present a guest with certain errata as fixed,
>> regardless of the state of actual hardware.
>> 
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
>> Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx>
> 
> In the form below/attached (integration with boot time microcode
> loading fixed and trailing white space removed)
> 
> Acked-by: Jan Beulich <jbeulich@xxxxxxxx>

There's one vpcu typo in a comment. Also the trylock while loops would
preferably have braces. Apart from that...

Acked-by: Keir Fraser <keir@xxxxxxx>

> -- Jan
> 
> In some cases guests should not provide workarounds for errata even when the
> physical processor is affected. For example, because of erratum 400 on family
> 10h processors a Linux guest will read an MSR (resulting in VMEXIT) before
> going to idle in order to avoid getting stuck in a non-C0 state. This is not
> necessary: HLT and IO instructions are intercepted and therefore there is no
> reason for erratum 400 workaround in the guest.
> 
> This patch allows us to present a guest with certain errata as fixed,
> regardless of the state of actual hardware.
> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx>
> Acked-by: Christoph Egger <Christoph.Egger@xxxxxxx>
> Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/tools/libxc/xc_cpuid_x86.c
> +++ b/tools/libxc/xc_cpuid_x86.c
> @@ -108,6 +108,7 @@ static void amd_xc_cpuid_policy(
>                      bitmaskof(X86_FEATURE_SSE4A) |
>                      bitmaskof(X86_FEATURE_MISALIGNSSE) |
>                      bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
> +                    bitmaskof(X86_FEATURE_OSVW) |
>                      bitmaskof(X86_FEATURE_XOP) |
>                      bitmaskof(X86_FEATURE_FMA4) |
>                      bitmaskof(X86_FEATURE_TBM) |
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -83,6 +83,10 @@ static DEFINE_PER_CPU_READ_MOSTLY(void *
>  
>  static bool_t amd_erratum383_found __read_mostly;
>  
> +/* OSVW bits */
> +static uint64_t osvw_length, osvw_status;
> +static DEFINE_SPINLOCK(osvw_lock);
> +
>  void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len)
>  {
>      struct vcpu *curr = current;
> @@ -902,6 +906,69 @@ static void svm_do_resume(struct vcpu *v
>      reset_stack_and_jump(svm_asm_do_resume);
>  }
>  
> +static void svm_guest_osvw_init(struct vcpu *vcpu)
> +{
> +    if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
> +        return;
> +
> +    /*
> +     * Guests should see errata 400 and 415 as fixed (assuming that
> +     * HLT and IO instructions are intercepted).
> +     */
> +    vcpu->arch.hvm_svm.osvw.length = (osvw_length >= 3) ? osvw_length : 3;
> +    vcpu->arch.hvm_svm.osvw.status = osvw_status & ~(6ULL);
> +
> +    /*
> +     * By increasing VCPU's osvw.length to 3 we are telling the guest that
> +     * all osvw.status bits inside that length, including bit 0 (which is
> +     * reserved for erratum 298), are valid. However, if host processor's
> +     * osvw_len is 0 then osvw_status[0] carries no information. We need to
> +     * be conservative here and therefore we tell the guest that erratum 298
> +     * is present (because we really don't know).
> +     */
> +    if ( osvw_length == 0 && boot_cpu_data.x86 == 0x10 )
> +        vcpu->arch.hvm_svm.osvw.status |= 1;
> +}
> +
> +void svm_host_osvw_reset()
> +{
> +    spin_lock(&osvw_lock);
> +
> +    osvw_length = 64; /* One register (MSRC001_0141) worth of errata */
> +    osvw_status = 0;
> +
> +    spin_unlock(&osvw_lock);
> +}
> +
> +void svm_host_osvw_init()
> +{
> +    spin_lock(&osvw_lock);
> +
> +    /*
> +     * Get OSVW bits. If bits are not the same on different processors then
> +     * choose the worst case (i.e. if erratum is present on one processor and
> +     * not on another assume that the erratum is present everywhere).
> +     */
> +    if ( test_bit(X86_FEATURE_OSVW, &boot_cpu_data.x86_capability) )
> +    {
> +        uint64_t len, status;
> +
> +        if ( rdmsr_safe(MSR_AMD_OSVW_ID_LENGTH, len) ||
> +             rdmsr_safe(MSR_AMD_OSVW_STATUS, status) )
> +            len = status = 0;
> +
> +        if (len < osvw_length)
> +            osvw_length = len;
> +
> +        osvw_status |= status;
> +        osvw_status &= (1ULL << osvw_length) - 1;
> +    }
> +    else
> +        osvw_length = osvw_status = 0;
> +
> +    spin_unlock(&osvw_lock);
> +}
> +
>  static int svm_domain_initialise(struct domain *d)
>  {
>      return 0;
> @@ -930,6 +997,9 @@ static int svm_vcpu_initialise(struct vc
>      }
>  
>      vpmu_initialise(v);
> +
> +    svm_guest_osvw_init(v);
> +
>      return 0;
>  }
>  
> @@ -1044,6 +1114,27 @@ static void svm_init_erratum_383(struct
>      }
>  }
>  
> +static int svm_handle_osvw(struct vcpu *v, uint32_t msr, uint64_t *val,
> bool_t read)
> +{
> +    uint eax, ebx, ecx, edx;
> +
> +    /* Guest OSVW support */
> +    hvm_cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
> +    if ( !test_bit((X86_FEATURE_OSVW & 31), &ecx) )
> +        return -1;
> +
> +    if ( read )
> +    {
> +        if (msr == MSR_AMD_OSVW_ID_LENGTH)
> +            *val = v->arch.hvm_svm.osvw.length;
> +        else
> +            *val = v->arch.hvm_svm.osvw.status;
> +    }
> +    /* Writes are ignored */
> +
> +    return 0;
> +}
> +
>  static int svm_cpu_up(void)
>  {
>      uint64_t msr_content;
> @@ -1094,6 +1185,9 @@ static int svm_cpu_up(void)
>      }
>  #endif
>  
> +    /* Initialize OSVW bits to be used by guests */
> +    svm_host_osvw_init();
> +
>      return 0;
>  }
>  
> @@ -1104,6 +1198,8 @@ struct hvm_function_table * __init start
>      if ( !test_bit(X86_FEATURE_SVM, &boot_cpu_data.x86_capability) )
>          return NULL;
>  
> +    svm_host_osvw_reset();
> +
>      if ( svm_cpu_up() )
>      {
>          printk("SVM: failed to initialise.\n");
> @@ -1388,6 +1484,13 @@ static int svm_msr_read_intercept(unsign
>          vpmu_do_rdmsr(msr, msr_content);
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, msr_content, 1);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_rdmsr(v, msr, msr_content);
>          if ( ret < 0 )
> @@ -1512,6 +1615,13 @@ static int svm_msr_write_intercept(unsig
>           */
>          break;
>  
> +    case MSR_AMD_OSVW_ID_LENGTH:
> +    case MSR_AMD_OSVW_STATUS:
> +        ret = svm_handle_osvw(v, msr, &msr_content, 0);
> +        if ( ret < 0 )
> +            goto gpf;
> +        break;
> +
>      default:
>          ret = nsvm_wrmsr(v, msr, msr_content);
>          if ( ret < 0 )
> --- a/xen/arch/x86/microcode.c
> +++ b/xen/arch/x86/microcode.c
> @@ -218,6 +218,16 @@ int microcode_update(XEN_GUEST_HANDLE(co
>      info->error = 0;
>      info->cpu = cpumask_first(&cpu_online_map);
>  
> +    if ( microcode_ops->start_update )
> +    {
> +        ret = microcode_ops->start_update();
> +        if ( ret != 0 )
> +        {
> +            xfree(info);
> +            return ret;
> +        }
> +    }
> +
>      return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
>  }
>  
> @@ -240,6 +250,12 @@ static int __init microcode_init(void)
>      if ( !data )
>          return -ENOMEM;
>  
> +    if ( microcode_ops->start_update && microcode_ops->start_update() != 0 )
> +    {
> +        ucode_mod_map(NULL);
> +        return 0;
> +    }
> +
>      softirq_tasklet_init(&tasklet, _do_microcode_update, (unsigned
> long)data);
>  
>      for_each_online_cpu ( cpu )
> --- a/xen/arch/x86/microcode_amd.c
> +++ b/xen/arch/x86/microcode_amd.c
> @@ -25,6 +25,7 @@
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/microcode.h>
> +#include <asm/hvm/svm/svm.h>
>  
>  struct equiv_cpu_entry {
>      uint32_t installed_cpu;
> @@ -71,6 +72,7 @@ struct mpbhdr {
>  /* serialize access to the physical write */
>  static DEFINE_SPINLOCK(microcode_update_lock);
>  
> +/* See comment in start_update() for cases when this routine fails */
>  static int collect_cpu_info(int cpu, struct cpu_signature *csig)
>  {
>      struct cpuinfo_x86 *c = &cpu_data[cpu];
> @@ -287,7 +289,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! Wrong "
>                 "microcode patch file magic\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_amd = xmalloc(struct microcode_amd);
> @@ -295,7 +298,8 @@ static int cpu_request_microcode(int cpu
>      {
>          printk(KERN_ERR "microcode: error! "
>                 "Can not allocate memory for microcode patch\n");
> -        return -ENOMEM;
> +        error = -ENOMEM;
> +        goto out;
>      }
>  
>      error = install_equiv_cpu_table(mc_amd, buf, &offset);
> @@ -303,7 +307,8 @@ static int cpu_request_microcode(int cpu
>      {
>          xfree(mc_amd);
>          printk(KERN_ERR "microcode: installing equivalent cpu table
> failed\n");
> -        return -EINVAL;
> +        error = -EINVAL;
> +        goto out;
>      }
>  
>      mc_old = uci->mc.mc_amd;
> @@ -337,13 +342,19 @@ static int cpu_request_microcode(int cpu
>      /* On success keep the microcode patch for
>       * re-apply on resume.
>       */
> -    if (error == 1)
> +    if ( error == 1 )
>      {
>          xfree(mc_old);
> -        return 0;
> +        error = 0;
> +    }
> +    else
> +    {
> +        xfree(mc_amd);
> +        uci->mc.mc_amd = mc_old;
>      }
> -    xfree(mc_amd);
> -    uci->mc.mc_amd = mc_old;
> +
> +  out:
> +    svm_host_osvw_init();
>  
>      return error;
>  }
> @@ -395,11 +406,28 @@ err1:
>      return -ENOMEM;
>  }
>  
> +static int start_update(void)
> +{
> +    /*
> +     * We assume here that svm_host_osvw_init() will be called on each cpu
> (from
> +     * cpu_request_microcode()).
> +     *
> +     * Note that if collect_cpu_info() returns an error then
> +     * cpu_request_microcode() will not invoked thus leaving OSVW bits not
> +     * updated. Currently though collect_cpu_info() will not fail on
> processors
> +     * supporting OSVW so we will not deal with this possibility.
> +     */
> +    svm_host_osvw_reset();
> +
> +    return 0;
> +}
> +
>  static const struct microcode_ops microcode_amd_ops = {
>      .microcode_resume_match           = microcode_resume_match,
>      .cpu_request_microcode            = cpu_request_microcode,
>      .collect_cpu_info                 = collect_cpu_info,
>      .apply_microcode                  = apply_microcode,
> +    .start_update                     = start_update,
>  };
>  
>  static __init int microcode_init_amd(void)
> --- a/xen/arch/x86/platform_hypercall.c
> +++ b/xen/arch/x86/platform_hypercall.c
> @@ -166,7 +166,21 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
>              break;
>  
>          guest_from_compat_handle(data, op->u.microcode.data);
> +
> +        /*
> +         * alloc_vpcu() will access data which is modified during
> +         * microcode update
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret = hypercall_create_continuation(
> +                    __HYPERVISOR_platform_op, "h", u_xenpf_op);
> +                goto out;
> +            }
> +
>          ret = microcode_update(data, op->u.microcode.length);
> +        spin_unlock(&vcpu_alloc_lock);
>      }
>      break;
>  
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -29,6 +29,7 @@
>  #include <xsm/xsm.h>
>  
>  static DEFINE_SPINLOCK(domctl_lock);
> +DEFINE_SPINLOCK(vcpu_alloc_lock);
>  
>  int cpumask_to_xenctl_cpumap(
>      struct xenctl_cpumap *xenctl_cpumap, const cpumask_t *cpumask)
> @@ -506,6 +507,18 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          /* Needed, for example, to ensure writable p.t. state is synced. */
>          domain_pause(d);
>  
> +        /*
> +         * Certain operations (e.g. CPU microcode updates) modify data which
> is
> +         * used during VCPU allocation/initialization
> +         */
> +        while ( !spin_trylock(&vcpu_alloc_lock) )
> +            if ( hypercall_preempt_check() )
> +            {
> +                ret =  hypercall_create_continuation(
> +                    __HYPERVISOR_domctl, "h", u_domctl);
> +                goto maxvcpu_out_novcpulock;
> +            }
> +
>          /* We cannot reduce maximum VCPUs. */
>          ret = -EINVAL;
>          if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
> @@ -555,6 +568,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
>          ret = 0;
>  
>      maxvcpu_out:
> +        spin_unlock(&vcpu_alloc_lock);
> +
> +    maxvcpu_out_novcpulock:
>          domain_unpause(d);
>          rcu_unlock_domain(d);
>      }
> --- a/xen/include/asm-x86/hvm/svm/svm.h
> +++ b/xen/include/asm-x86/hvm/svm/svm.h
> @@ -98,4 +98,7 @@ extern u32 svm_feature_flags;
>                                    ~TSC_RATIO_RSVD_BITS )
>  #define vcpu_tsc_ratio(v)       TSC_RATIO((v)->domain->arch.tsc_khz, cpu_khz)
>  
> +extern void svm_host_osvw_reset(void);
> +extern void svm_host_osvw_init(void);
> +
>  #endif /* __ASM_X86_HVM_SVM_H__ */
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h
> @@ -516,6 +516,12 @@ struct arch_svm_struct {
>      
>      /* AMD lightweight profiling MSR */
>      uint64_t guest_lwp_cfg;
> +
> +    /* OSVW MSRs */
> +    struct {
> +        u64 length;
> +        u64 status;
> +    } osvw;
>  };
>  
>  struct vmcb_struct *alloc_vmcb(void);
> --- a/xen/include/asm-x86/microcode.h
> +++ b/xen/include/asm-x86/microcode.h
> @@ -11,6 +11,7 @@ struct microcode_ops {
>      int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
>      int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
>      int (*apply_microcode)(int cpu);
> +    int (*start_update)(void);
>  };
>  
>  struct cpu_signature {
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -69,6 +69,7 @@ void arch_dump_domain_info(struct domain
>  
>  void arch_vcpu_reset(struct vcpu *v);
>  
> +extern spinlock_t vcpu_alloc_lock;
>  bool_t domctl_lock_acquire(void);
>  void domctl_lock_release(void);
>  
> 
> 



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.