[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2] x86: generic MSRs save/restore



On 13/12/2013 14:01, Jan Beulich wrote:
> This patch introduces a generic MSRs save/restore mechanism, so that
> in the future new MSRs save/restore could be added w/ smaller change
> than the full blown addition of a new save/restore type.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str
>      return 0;
>  }
>  
> -/* We need variable length data chunk for xsave area, hence customized
> - * declaration other than HVM_REGISTER_SAVE_RESTORE.
> +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt])
> +static unsigned int __read_mostly msr_count_max;
> +
> +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h)
> +{
> +    struct vcpu *v;
> +
> +    for_each_vcpu ( d, v )
> +    {
> +        struct hvm_msr *ctxt;
> +        unsigned int i;
> +
> +        if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id,
> +                             HVM_CPU_MSR_SIZE(msr_count_max)) )
> +            return 1;
> +        ctxt = (struct hvm_msr *)&h->data[h->cur];
> +        ctxt->count = 0;
> +
> +        if ( hvm_funcs.save_msr )
> +            hvm_funcs.save_msr(v, ctxt);
> +
> +        for ( i = 0; i < ctxt->count; ++i )
> +            ctxt->msr[i]._rsvd = 0;
> +
> +        if ( ctxt->count )
> +            h->cur += HVM_CPU_MSR_SIZE(ctxt->count);
> +        else
> +            h->cur -= sizeof(struct hvm_save_descriptor);

On the last iteration of the loop, this will leave a stale CPU_MSR_CODE
header in the area between the end of the hvm record and the maximum
possible size of the record, which then gets copied into the toolstack-
provided buffer.

Luckily, it does appear that xc_domain_save() does deal with this
correctly and only send the valid subset of the entire record.  I
presume we dont care about breaking any other toolstacks which might get
this wrong?

> +    }
> +
> +    return 0;
> +}
> +
> +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h)
> +{
> +    unsigned int i, vcpuid = hvm_load_instance(h);
> +    struct vcpu *v;
> +    const struct hvm_save_descriptor *desc;
> +    struct hvm_msr *ctxt;
> +    int err = 0;
> +
> +    if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
> +    {
> +        dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
> +                d->domain_id, vcpuid);
> +        return -EINVAL;
> +    }
> +
> +    /* Customized checking for entry since our entry is of variable length */
> +    desc = (struct hvm_save_descriptor *)&h->data[h->cur];
> +    if ( sizeof (*desc) > h->size - h->cur)
> +    {
> +        printk(XENLOG_G_WARNING
> +               "HVM%d.%d restore: not enough data left to read MSR 
> descriptor\n",
> +               d->domain_id, vcpuid);
> +        return -ENODATA;
> +    }
> +    if ( desc->length + sizeof (*desc) > h->size - h->cur)
> +    {
> +        printk(XENLOG_G_WARNING
> +               "HVM%d.%d restore: not enough data left to read %u MSR 
> bytes\n",
> +               d->domain_id, vcpuid, desc->length);
> +        return -ENODATA;
> +    }
> +    if ( desc->length < HVM_CPU_MSR_SIZE(1) )
> +    {
> +        printk(XENLOG_G_WARNING
> +               "HVM%d.%d restore mismatch: MSR length %u < %zu\n",
> +               d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1));
> +        return -EINVAL;
> +    }
> +
> +    h->cur += sizeof(*desc);
> +    ctxt = (struct hvm_msr *)&h->data[h->cur];
> +    h->cur += desc->length;
> +
> +    if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) )
> +    {
> +        printk(XENLOG_G_WARNING
> +               "HVM%d.%d restore mismatch: MSR length %u != %zu\n",
> +               d->domain_id, vcpuid, desc->length,
> +               HVM_CPU_MSR_SIZE(ctxt->count));
> +        return -EOPNOTSUPP;
> +    }
> +
> +    for ( i = 0; i < ctxt->count; ++i )
> +        if ( ctxt->msr[i]._rsvd )
> +            return -EOPNOTSUPP;
> +    /* Checking finished */
> +
> +    if ( hvm_funcs.load_msr )
> +        err = hvm_funcs.load_msr(v, ctxt);
> +
> +    for ( i = 0; !err && i < ctxt->count; ++i )
> +    {
> +        switch ( ctxt->msr[i].index )
> +        {
> +        default:
> +            if ( !ctxt->msr[i]._rsvd )
> +                err = -ENXIO;
> +            break;
> +        }
> +    }
> +
> +    return err;
> +}
> +
> +/* We need variable length data chunks for XSAVE area and MSRs, hence
> + * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE.
>   */
> -static int __init __hvm_register_CPU_XSAVE_save_and_restore(void)
> +static int __init hvm_register_CPU_save_and_restore(void)
>  {
>      hvm_register_savevm(CPU_XSAVE_CODE,
>                          "CPU_XSAVE",
> @@ -1139,9 +1246,22 @@ static int __init __hvm_register_CPU_XSA
>                          HVM_CPU_XSAVE_SIZE(xfeature_mask) +
>                              sizeof(struct hvm_save_descriptor),
>                          HVMSR_PER_VCPU);
> +
> +    if ( hvm_funcs.init_msr )
> +        msr_count_max += hvm_funcs.init_msr();

Why += as opposed to direct assignment?  Changing this value anywhere
other than here looks as if it will lead to problems.

> +
> +    if ( msr_count_max )
> +        hvm_register_savevm(CPU_MSR_CODE,
> +                            "CPU_MSR",
> +                            hvm_save_cpu_msrs,
> +                            hvm_load_cpu_msrs,
> +                            HVM_CPU_MSR_SIZE(msr_count_max) +
> +                                sizeof(struct hvm_save_descriptor),
> +                            HVMSR_PER_VCPU);
> +
>      return 0;
>  }
> -__initcall(__hvm_register_CPU_XSAVE_save_and_restore);
> +__initcall(hvm_register_CPU_save_and_restore);
>  
>  int hvm_vcpu_initialise(struct vcpu *v)
>  {
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -109,6 +109,10 @@ struct hvm_function_table {
>      void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt);
>      int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt);
>  
> +    unsigned int (*init_msr)(void);

Possibly a comment to explain that this must return the number of MSRs
we expect to insert into an hvm msr record?

> +    void (*save_msr)(struct vcpu *, struct hvm_msr *);
> +    int (*load_msr)(struct vcpu *, struct hvm_msr *);
> +
>      /* Examine specifics of the guest state. */
>      unsigned int (*get_interrupt_shadow)(struct vcpu *v);
>      void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow);
> --- a/xen/include/public/arch-x86/hvm/save.h
> +++ b/xen/include/public/arch-x86/hvm/save.h
> @@ -592,9 +592,21 @@ struct hvm_tsc_adjust {
>  
>  DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust);
>  
> +
> +struct hvm_msr {
> +    uint32_t count;
> +    struct hvm_one_msr {
> +        uint32_t index;
> +        uint32_t _rsvd;
> +        uint64_t val;
> +    } msr[1 /* variable size */];

Coverity is going to complain about using this with more than 1 record,
but I can't think of a better way of doing this without introducing a
VLA to the public header files.

As far as I can tell, the underlying implementation is safe to index off
the end of msr[].

~Andrew

> +};
> +
> +#define CPU_MSR_CODE  20
> +
>  /* 
>   * Largest type-code in use
>   */
> -#define HVM_SAVE_CODE_MAX 19
> +#define HVM_SAVE_CODE_MAX 20
>  
>  #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.