[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH Remus v5 2/2] libxc/restore: implement Remus checkpointed restore



On 14/05/15 11:06, Yang Hongyang wrote:
> With Remus, the restore flow should be:
> the first full migration stream -> { periodically restore stream }
>
> Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
> CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
> CC: Wei Liu <wei.liu2@xxxxxxxxxx>

Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

> ---
>  tools/libxc/xc_sr_common.h  |  14 ++++++
>  tools/libxc/xc_sr_restore.c | 113 
> ++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 117 insertions(+), 10 deletions(-)
>
> diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
> index f8121e7..3bf27f1 100644
> --- a/tools/libxc/xc_sr_common.h
> +++ b/tools/libxc/xc_sr_common.h
> @@ -208,6 +208,20 @@ struct xc_sr_context
>              /* Plain VM, or checkpoints over time. */
>              bool checkpointed;
>  
> +            /* Currently buffering records between a checkpoint */
> +            bool buffer_all_records;
> +
> +/*
> + * With Remus, we buffer the records sent by the primary at checkpoint,
> + * in case the primary will fail, we can recover from the last
> + * checkpoint state.
> + * This should be enough because primary only send dirty pages at
> + * checkpoint.
> + */
> +#define MAX_BUF_RECORDS 1024
> +            struct xc_sr_record *buffered_records;
> +            unsigned buffered_rec_num;
> +
>              /*
>               * Xenstore and Console parameters.
>               * INPUT:  evtchn & domid
> diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
> index 9ab5760..8468ffc 100644
> --- a/tools/libxc/xc_sr_restore.c
> +++ b/tools/libxc/xc_sr_restore.c
> @@ -468,11 +468,69 @@ static int handle_page_data(struct xc_sr_context *ctx, 
> struct xc_sr_record *rec)
>      return rc;
>  }
>  
> +static int process_record(struct xc_sr_context *ctx, struct xc_sr_record 
> *rec);
> +static int handle_checkpoint(struct xc_sr_context *ctx)
> +{
> +    xc_interface *xch = ctx->xch;
> +    int rc = 0;
> +    unsigned i;
> +
> +    if ( !ctx->restore.checkpointed )
> +    {
> +        ERROR("Found checkpoint in non-checkpointed stream");
> +        rc = -1;
> +        goto err;
> +    }
> +
> +    if ( ctx->restore.buffer_all_records )
> +    {
> +        IPRINTF("All records buffered");
> +
> +        /*
> +         * We need to set buffer_all_records to false in
> +         * order to process records instead of buffer records.
> +         * buffer_all_records should be set back to true after
> +         * we successfully processed all records.
> +         */
> +        ctx->restore.buffer_all_records = false;
> +        for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
> +        {
> +            rc = process_record(ctx, &ctx->restore.buffered_records[i]);
> +            if ( rc )
> +                goto err;
> +        }
> +        ctx->restore.buffered_rec_num = 0;
> +        ctx->restore.buffer_all_records = true;
> +        IPRINTF("All records processed");
> +    }
> +    else
> +        ctx->restore.buffer_all_records = true;
> +
> + err:
> +    return rc;
> +}
> +
>  static int process_record(struct xc_sr_context *ctx, struct xc_sr_record 
> *rec)
>  {
>      xc_interface *xch = ctx->xch;
>      int rc = 0;
>  
> +    if ( ctx->restore.buffer_all_records &&
> +         rec->type != REC_TYPE_END &&
> +         rec->type != REC_TYPE_CHECKPOINT )
> +    {
> +        if ( ctx->restore.buffered_rec_num >= MAX_BUF_RECORDS )
> +        {
> +            ERROR("There are too many records within a checkpoint");
> +            return -1;
> +        }
> +
> +        
> memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
> +               rec, sizeof(*rec));
> +
> +        return 0;
> +    }
> +
>      switch ( rec->type )
>      {
>      case REC_TYPE_END:
> @@ -487,12 +545,17 @@ static int process_record(struct xc_sr_context *ctx, 
> struct xc_sr_record *rec)
>          ctx->restore.verify = true;
>          break;
>  
> +    case REC_TYPE_CHECKPOINT:
> +        rc = handle_checkpoint(ctx);
> +        break;
> +
>      default:
>          rc = ctx->restore.ops.process_record(ctx, rec);
>          break;
>      }
>  
>      free(rec->data);
> +    rec->data = NULL;
>  
>      if ( rc == RECORD_NOT_PROCESSED )
>      {
> @@ -529,6 +592,15 @@ static int setup(struct xc_sr_context *ctx)
>          goto err;
>      }
>  
> +    ctx->restore.buffered_records = malloc(
> +        MAX_BUF_RECORDS * sizeof(struct xc_sr_record));
> +    if ( !ctx->restore.buffered_records )
> +    {
> +        ERROR("Unable to allocate memory for buffered records");
> +        rc = -1;
> +        goto err;
> +    }
> +
>   err:
>      return rc;
>  }
> @@ -536,7 +608,12 @@ static int setup(struct xc_sr_context *ctx)
>  static void cleanup(struct xc_sr_context *ctx)
>  {
>      xc_interface *xch = ctx->xch;
> +    unsigned i;
> +
> +    for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
> +        free(ctx->restore.buffered_records[i].data);
>  
> +    free(ctx->restore.buffered_records);
>      free(ctx->restore.populated_pfns);
>      if ( ctx->restore.ops.cleanup(ctx) )
>          PERROR("Failed to clean up");
> @@ -564,7 +641,27 @@ static int restore(struct xc_sr_context *ctx)
>      {
>          rc = read_record(ctx, &rec);
>          if ( rc )
> -            goto err;
> +        {
> +            if ( ctx->restore.buffer_all_records )
> +                goto remus_failover;
> +            else
> +                goto err;
> +        }
> +
> +#ifdef XG_LIBXL_HVM_COMPAT
> +        if ( ctx->dominfo.hvm &&
> +             (rec.type == REC_TYPE_END || rec.type == REC_TYPE_CHECKPOINT) )
> +        {
> +            rc = read_qemu(ctx);
> +            if ( rc )
> +            {
> +                if ( ctx->restore.buffer_all_records )
> +                    goto remus_failover;
> +                else
> +                    goto err;
> +            }
> +        }
> +#endif
>  
>          rc = process_record(ctx, &rec);
>          if ( rc )
> @@ -572,15 +669,11 @@ static int restore(struct xc_sr_context *ctx)
>  
>      } while ( rec.type != REC_TYPE_END );
>  
> -#ifdef XG_LIBXL_HVM_COMPAT
> -    if ( ctx->dominfo.hvm )
> -    {
> -        rc = read_qemu(ctx);
> -        if ( rc )
> -            goto err;
> -    }
> -#endif
> -
> + remus_failover:
> +    /*
> +     * With Remus, if we reach here, there must be some error on primary,
> +     * failover from the last checkpoint state.
> +     */
>      rc = ctx->restore.ops.stream_complete(ctx);
>      if ( rc )
>          goto err;


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.