[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v6 COLO 04/15] libxc/restore: support COLO restore
call the callbacks resume/checkpoint/suspend while secondary vm status is consistent with primary. Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> CC: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- tools/libxc/xc_sr_common.h | 11 +++++-- tools/libxc/xc_sr_restore.c | 63 ++++++++++++++++++++++++++++++++++++- tools/libxc/xc_sr_restore_x86_hvm.c | 1 + 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h index 565c5da..382bf76 100644 --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -132,8 +132,11 @@ struct xc_sr_restore_ops * * @return 0 for success, -1 for failure, or the sentinel value * RECORD_NOT_PROCESSED. + * BROKEN_CHANNEL: if we are under Remus/COLO, this means master may dead, + * we will failover. */ #define RECORD_NOT_PROCESSED 1 +#define BROKEN_CHANNEL 2 int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec); /** @@ -205,8 +208,12 @@ struct xc_sr_context uint32_t guest_type; uint32_t guest_page_size; - /* Plain VM, or checkpoints over time. */ - bool checkpointed; + /* + * 0: Plain VM + * 1: Remus + * 2: COLO + */ + int checkpointed; /* Currently buffering records between a checkpoint */ bool buffer_all_records; diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c index 2d2edd3..982a70e 100644 --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -1,4 +1,5 @@ #include <arpa/inet.h> +#include <assert.h> #include "xc_sr_common.h" @@ -472,7 +473,7 @@ static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec); static int handle_checkpoint(struct xc_sr_context *ctx) { xc_interface *xch = ctx->xch; - int rc = 0; + int rc = 0, ret; unsigned i; if ( !ctx->restore.checkpointed ) @@ -498,6 +499,46 @@ static int handle_checkpoint(struct xc_sr_context *ctx) else ctx->restore.buffer_all_records = true; + if ( ctx->restore.checkpointed == 2 ) + { +#define HANDLE_CALLBACK_RETURN_VALUE(ret) \ + do { \ + if ( ret == 0 ) \ + { \ + /* Some internal error happens */ \ + rc = -1; \ + goto err; \ + } \ + else if ( ret == 2 ) \ + { \ + /* Reading/writing error, do failover */ \ + rc = BROKEN_CHANNEL; \ + goto err; \ + } \ + } while (0) + + /* COLO */ + + /* We need to resume guest */ + rc = ctx->restore.ops.stream_complete(ctx); + if ( rc ) + goto err; + + /* TODO: call restore_results */ + + /* Resume secondary vm */ + ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + + /* wait for new checkpoint */ + ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + + /* suspend secondary vm */ + ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + } + err: return rc; } @@ -678,6 +719,8 @@ static int restore(struct xc_sr_context *ctx) goto err; } } + else if ( rc == BROKEN_CHANNEL ) + goto remus_failover; else if ( rc ) goto err; } @@ -685,6 +728,15 @@ static int restore(struct xc_sr_context *ctx) } while ( rec.type != REC_TYPE_END ); remus_failover: + + if ( ctx->restore.checkpointed == 2 ) + { + /* With COLO, we have already called stream_complete */ + rc = 0; + IPRINTF("COLO Failover"); + goto done; + } + /* * With Remus, if we reach here, there must be some error on primary, * failover from the last checkpoint state. @@ -735,6 +787,15 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, ctx.restore.checkpointed = checkpointed_stream; ctx.restore.callbacks = callbacks; + /* Sanity checks for callbacks. */ + if ( ctx.restore.checkpointed == 2 ) + { + /* this is COLO restore */ + assert(callbacks->suspend && + callbacks->checkpoint && + callbacks->postcopy); + } + IPRINTF("In experimental %s", __func__); DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d" ", checkpointed_stream %d", io_fd, dom, hvm, pae, diff --git a/tools/libxc/xc_sr_restore_x86_hvm.c b/tools/libxc/xc_sr_restore_x86_hvm.c index 06177e0..8e54c68 100644 --- a/tools/libxc/xc_sr_restore_x86_hvm.c +++ b/tools/libxc/xc_sr_restore_x86_hvm.c @@ -181,6 +181,7 @@ static int handle_qemu(struct xc_sr_context *ctx) if ( fp ) fclose(fp); free(qbuf); + ctx->x86_hvm.restore.qbuf = NULL; return rc; } -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |