[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 3/3] remus: adjust x86 pv restore to support remus



On 09/07/14 08:47, Yang Hongyang wrote:
> cache vcpu context when restore, and set context when stream
> complete.

Can you explain why this is needed?  I can't see why it should be required.

>
> Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
> ---
>  tools/libxc/saverestore/common.h         |  6 +++
>  tools/libxc/saverestore/restore_x86_pv.c | 67 
> ++++++++++++++++++++++----------
>  2 files changed, 53 insertions(+), 20 deletions(-)
>
> diff --git a/tools/libxc/saverestore/common.h 
> b/tools/libxc/saverestore/common.h
> index 1dd9f51..ba54f83 100644
> --- a/tools/libxc/saverestore/common.h
> +++ b/tools/libxc/saverestore/common.h
> @@ -235,6 +235,12 @@ struct xc_sr_context
>  
>              /* Read-only mapping of guests shared info page */
>              shared_info_any_t *shinfo;
> +
> +            /*
> +             * We need to cache vcpu context when doing checkpointed
> +             * restore, otherwise restore will fail
> +             */
> +            vcpu_guest_context_any_t *vcpu;

"vcpus" seems more appropriate.

>          } x86_pv;
>      };
>  };
> diff --git a/tools/libxc/saverestore/restore_x86_pv.c 
> b/tools/libxc/saverestore/restore_x86_pv.c
> index 14fc020..7a54047 100644
> --- a/tools/libxc/saverestore/restore_x86_pv.c
> +++ b/tools/libxc/saverestore/restore_x86_pv.c
> @@ -428,8 +428,8 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context 
> *ctx, struct xc_sr_reco
>  {
>      xc_interface *xch = ctx->xch;
>      struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
> -    vcpu_guest_context_any_t vcpu;
> -    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu.x64) : 
> sizeof(vcpu.x32);
> +    vcpu_guest_context_any_t *vcpu;
> +    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : 
> sizeof(vcpu->x32);

Constructs like this are used in several places.  A

static inline size_t pv_vcpu_size(const struct xc_sr_context *ctx)

helper in common_x86.h would be a neat improvement.

>      xen_pfn_t pfn, mfn;
>      unsigned long tmp;
>      unsigned i;
> @@ -455,22 +455,23 @@ static int handle_x86_pv_vcpu_basic(struct 
> xc_sr_context *ctx, struct xc_sr_reco
>          goto err;
>      }
>  
> -    memcpy(&vcpu, &vhdr->context, vcpusz);
> +    vcpu = (void *)ctx->x86_pv.vcpu + vcpusz * vhdr->vcpu_id;

vhdr->vcpu_id is essentially untrusted input at this point.  It needs to
be validated against dominfo.max_vcpu_id if it is to be used like this.

It was safe before as the first place it was used was with the set
hypercall which would validate in Xen.

> +    memcpy(vcpu, &vhdr->context, vcpusz);

If you are caching the context, the rest of this function can also be
deferred until the end, so it is run once rather than N times.

>  
>      /* Vcpu 0 is special: Convert the suspend record to an mfn. */
>      if ( vhdr->vcpu_id == 0 )
>      {
> -        rc = process_start_info(ctx, &vcpu);
> +        rc = process_start_info(ctx, vcpu);
>          if ( rc )
>              return rc;
>          rc = -1;
>      }
>  
> -    SET_FIELD(&vcpu, flags,
> -              GET_FIELD(&vcpu, flags, ctx->x86_pv.width) | VGCF_online,
> +    SET_FIELD(vcpu, flags,
> +              GET_FIELD(vcpu, flags, ctx->x86_pv.width) | VGCF_online,
>                ctx->x86_pv.width);
>  
> -    tmp = GET_FIELD(&vcpu, gdt_ents, ctx->x86_pv.width);
> +    tmp = GET_FIELD(vcpu, gdt_ents, ctx->x86_pv.width);
>      if ( tmp > 8192 )
>      {
>          ERROR("GDT entry count (%lu) out of range", tmp);
> @@ -481,7 +482,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context 
> *ctx, struct xc_sr_reco
>      /* Convert GDT frames to mfns. */
>      for ( i = 0; (i * 512) < tmp; ++i )
>      {
> -        pfn = GET_FIELD(&vcpu, gdt_frames[i], ctx->x86_pv.width);
> +        pfn = GET_FIELD(vcpu, gdt_frames[i], ctx->x86_pv.width);
>          if ( pfn > ctx->x86_pv.max_pfn )
>          {
>              ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
> @@ -502,11 +503,11 @@ static int handle_x86_pv_vcpu_basic(struct 
> xc_sr_context *ctx, struct xc_sr_reco
>              goto err;
>          }
>  
> -        SET_FIELD(&vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
> +        SET_FIELD(vcpu, gdt_frames[i], mfn, ctx->x86_pv.width);
>      }
>  
>      /* Convert CR3 to an mfn. */
> -    pfn = cr3_to_mfn(ctx, GET_FIELD(&vcpu, ctrlreg[3], ctx->x86_pv.width));
> +    pfn = cr3_to_mfn(ctx, GET_FIELD(vcpu, ctrlreg[3], ctx->x86_pv.width));
>      if ( pfn > ctx->x86_pv.max_pfn )
>      {
>          ERROR("cr3 (pfn %#lx) out of range", pfn);
> @@ -529,12 +530,12 @@ static int handle_x86_pv_vcpu_basic(struct 
> xc_sr_context *ctx, struct xc_sr_reco
>          goto err;
>      }
>  
> -    SET_FIELD(&vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
> +    SET_FIELD(vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86_pv.width);
>  
>      /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
> -    if ( ctx->x86_pv.levels == 4 && (vcpu.x64.ctrlreg[1] & 1) )
> +    if ( ctx->x86_pv.levels == 4 && (vcpu->x64.ctrlreg[1] & 1) )
>      {
> -        pfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
> +        pfn = vcpu->x64.ctrlreg[1] >> PAGE_SHIFT;
>  
>          if ( pfn > ctx->x86_pv.max_pfn )
>          {
> @@ -558,13 +559,7 @@ static int handle_x86_pv_vcpu_basic(struct xc_sr_context 
> *ctx, struct xc_sr_reco
>              goto err;
>          }
>  
> -        vcpu.x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
> -    }
> -
> -    if ( xc_vcpu_setcontext(xch, ctx->domid, vhdr->vcpu_id, &vcpu) )
> -    {
> -        PERROR("Failed to set vcpu%u's basic info", vhdr->vcpu_id);
> -        goto err;
> +        vcpu->x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
>      }
>  
>      rc = 0;
> @@ -881,6 +876,7 @@ static int x86_pv_localise_page(struct xc_sr_context 
> *ctx, uint32_t type, void *
>  static int x86_pv_setup(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> +    size_t vcpusz;
>      int rc;
>  
>      if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
> @@ -904,6 +900,9 @@ static int x86_pv_setup(struct xc_sr_context *ctx)
>      if ( rc )
>          return rc;
>  
> +    vcpusz = ctx->x86_pv.width == 8 ?
> +             sizeof(ctx->x86_pv.vcpu->x64) : sizeof(ctx->x86_pv.vcpu->x32);
> +    ctx->x86_pv.vcpu = calloc((ctx->dominfo.max_vcpu_id + 1) , vcpusz);

Extraneous brackets and space.

>      return rc;
>  }
>  
> @@ -946,6 +945,29 @@ static int x86_pv_process_record(struct xc_sr_context 
> *ctx, struct xc_sr_record
>      }
>  }
>  
> +static int update_vcpu_context(struct xc_sr_context *ctx)
> +{
> +    xc_interface *xch = ctx->xch;
> +    vcpu_guest_context_any_t *vcpu;
> +    uint32_t vcpu_id;
> +    size_t vcpusz = ctx->x86_pv.width == 8 ? sizeof(vcpu->x64) : 
> sizeof(vcpu->x32);
> +
> +    for ( vcpu_id = 0; vcpu_id <= ctx->dominfo.max_vcpu_id; vcpu_id++ )
> +    {
> +        vcpu = (void *)ctx->x86_pv.vcpu + vcpu_id * vcpusz;
> +        if ( !vcpu )
> +            continue;

This check can never fail.

> +
> +        if ( xc_vcpu_setcontext(xch, ctx->domid, vcpu_id, vcpu) )

There is a latent bug introduced here.  If X86_PV_VCPU_BASIC records
have not been seen for all vcpus, this code will attempt to set a
context of a block of zeroes, and fail because of a bad cr3.

~Andrew

> +        {
> +            PERROR("Failed to set vcpu%u's basic info", vcpu_id);
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  /*
>   * restore_ops function.  Pin the pagetables, rewrite the p2m and seed the
>   * grant table.
> @@ -963,6 +985,10 @@ static int x86_pv_stream_complete(struct xc_sr_context 
> *ctx)
>      if ( rc )
>          return rc;
>  
> +    rc = update_vcpu_context(ctx);
> +    if ( rc )
> +        return rc;
> +
>      rc = xc_dom_gnttab_seed(xch, ctx->domid,
>                              ctx->restore.console_mfn,
>                              ctx->restore.xenstore_mfn,
> @@ -985,6 +1011,7 @@ static int x86_pv_cleanup(struct xc_sr_context *ctx)
>      free(ctx->x86_pv.p2m);
>      free(ctx->x86_pv.p2m_pfns);
>      free(ctx->x86_pv.pfn_types);
> +    free(ctx->x86_pv.vcpu);
>  
>      if ( ctx->x86_pv.m2p )
>          munmap(ctx->x86_pv.m2p, ctx->x86_pv.nr_m2p_frames * PAGE_SIZE);


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.