[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH COLO v5 20/29] implement the cmdline for COLO
From: Wen Congyang <wency@xxxxxxxxxxxxxx> Add a new option -c to the command 'xl remus'. If you want to use COLO HA instead of Remus HA, please use -c option. Update man pages to reflect the addition of a new option to 'xl remus' command. Also add a new option -c to the internal command 'xl migrate-receive'. Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- docs/man/xl.pod.1 | 12 +++++++++-- tools/libxl/libxl.c | 16 ++++++++++++++ tools/libxl/xl_cmdimpl.c | 53 +++++++++++++++++++++++++++++++++++++++-------- tools/libxl/xl_cmdtable.c | 4 +++- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 index 16783c8..adcbe37 100644 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -440,12 +440,15 @@ Print huge (!) amount of debug during the migration process. =item B<remus> [I<OPTIONS>] I<domain-id> I<host> -Enable Remus HA for domain. By default B<xl> relies on ssh as a transport -mechanism between the two hosts. +Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a +transport mechanism between the two hosts. N.B: Remus support in xl is still in experimental (proof-of-concept) phase. Disk replication support is limited to DRBD disks. + COLO support in xl is still in experimental (proof-of-concept) phase. + There is no support for network or disk at the moment. + B<OPTIONS> =over 4 @@ -491,6 +494,11 @@ Disable network output buffering. Requires enabling unsafe mode. Disable disk replication. Requires enabling unsafe mode. +=item B<-c> + +Enable COLO HA. It is conflict with B<-i> and B<-b>, and memory +checkpoint compression must be disabled. + =back =item B<pause> I<domain-id> diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index b6c5429..afe0cc9 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -862,6 +862,22 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, goto out; } + /* The caller must set this defbool */ + if (libxl_defbool_is_default(info->colo)) { + LOG(ERROR, "colo mode must be enabled/disabled"); + rc = ERROR_FAIL; + goto out; + } + + if (libxl_defbool_val(info->colo)) { + libxl_defbool_setdefault(&info->compression, false); + if (libxl_defbool_val(info->compression)) { + LOG(ERROR, "cannot use memory checkpoint compression in COLO mode"); + rc = ERROR_FAIL; + goto out; + } + } + libxl_defbool_setdefault(&info->allow_unsafe, false); libxl_defbool_setdefault(&info->blackhole, false); libxl_defbool_setdefault(&info->compression, true); diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 4574d05..6c5b792 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -4250,6 +4250,9 @@ static void migrate_receive(int debug, int daemonize, int monitor, dom_info.send_fd = send_fd; dom_info.migration_domname_r = &migration_domname; dom_info.checkpointed_stream = remus; + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) + /* COLO uses stdout to send control message to master */ + dom_info.quiet = 1; rc = create_domain(&dom_info); if (rc < 0) { @@ -4264,7 +4267,8 @@ static void migrate_receive(int debug, int daemonize, int monitor, /* If we are here, it means that the sender (primary) has crashed. * TODO: Split-Brain Check. */ - fprintf(stderr, "migration target: Remus Failover for domain %u\n", + fprintf(stderr, "migration target: %s Failover for domain %u\n", + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", domid); /* @@ -4281,15 +4285,21 @@ static void migrate_receive(int debug, int daemonize, int monitor, rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname); if (rc) - fprintf(stderr, "migration target (Remus): " + fprintf(stderr, "migration target (%s): " "Failed to rename domain from %s to %s:%d\n", + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", migration_domname, common_domname, rc); } + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) + /* The guest is running after failover in COLO mode */ + exit(rc ? -ERROR_FAIL: 0); + rc = libxl_domain_unpause(ctx, domid); if (rc) - fprintf(stderr, "migration target (Remus): " + fprintf(stderr, "migration target (%s): " "Failed to unpause domain %s (id: %u):%d\n", + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", common_domname, domid, rc); exit(rc ? -ERROR_FAIL: 0); @@ -4435,7 +4445,7 @@ int main_migrate_receive(int argc, char **argv) int debug = 0, daemonize = 1, monitor = 1, remus = 0; int opt; - SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) { + SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) { case 'F': daemonize = 0; break; @@ -4447,8 +4457,10 @@ int main_migrate_receive(int argc, char **argv) debug = 1; break; case 'r': - remus = 1; + remus = LIBXL_CHECKPOINTED_STREAM_REMUS; break; + case 'c': + remus = LIBXL_CHECKPOINTED_STREAM_COLO; } if (argc-optind != 0) { @@ -7892,15 +7904,18 @@ int main_remus(int argc, char **argv) pid_t child = -1; uint8_t *config_data; int config_len; + int interval = 0; memset(&r_info, 0, sizeof(libxl_domain_remus_info)); /* Defaults */ r_info.interval = 200; libxl_defbool_setdefault(&r_info.blackhole, false); + libxl_defbool_setdefault(&r_info.colo, false); - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) { + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { case 'i': r_info.interval = atoi(optarg); + interval = 1; break; case 'F': libxl_defbool_set(&r_info.allow_unsafe, true); @@ -7926,11 +7941,28 @@ int main_remus(int argc, char **argv) case 'e': daemonize = 0; break; + case 'c': + libxl_defbool_set(&r_info.colo, true); } domid = find_domain(argv[optind]); host = argv[optind + 1]; + if (libxl_defbool_val(r_info.colo)) { + if (!interval) + r_info.interval = 0; + + if (r_info.interval || libxl_defbool_val(r_info.blackhole)) { + perror("option -c is conflict with -i or -b"); + exit(-1); + } + + if (libxl_defbool_is_default(r_info.compression)) { + perror("option -u must be specified when using COLO"); + exit(-1); + } + } + if (!r_info.netbufscript) r_info.netbufscript = default_remus_netbufscript; @@ -7945,8 +7977,9 @@ int main_remus(int argc, char **argv) if (!ssh_command[0]) { rune = host; } else { - if (asprintf(&rune, "exec %s %s xl migrate-receive -r %s", + if (asprintf(&rune, "exec %s %s xl migrate-receive %s %s", ssh_command, host, + libxl_defbool_val(r_info.colo) ? "-c" : "-r", daemonize ? "" : " -e") < 0) return 1; } @@ -7975,7 +8008,8 @@ int main_remus(int argc, char **argv) * domain to force failover */ if (libxl_domain_info(ctx, 0, domid)) { - fprintf(stderr, "Remus: Primary domain has been destroyed.\n"); + fprintf(stderr, "%s: Primary domain has been destroyed.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); close(send_fd); return 0; } @@ -7987,7 +8021,8 @@ int main_remus(int argc, char **argv) if (rc == ERROR_GUEST_TIMEDOUT) fprintf(stderr, "Failed to suspend domain at primary.\n"); else { - fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n"); + fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); libxl_domain_resume(ctx, domid, 1, 0); } diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c index 9284887..8ba256a 100644 --- a/tools/libxl/xl_cmdtable.c +++ b/tools/libxl/xl_cmdtable.c @@ -514,7 +514,9 @@ struct cmd_spec cmd_table[] = { "-b Replicate memory checkpoints to /dev/null (blackhole).\n" " Works only in unsafe mode.\n" "-n Disable network output buffering. Works only in unsafe mode.\n" - "-d Disable disk replication. Works only in unsafe mode." + "-d Disable disk replication. Works only in unsafe mode.\n" + "-c Enable COLO HA. It is conflict with -i and -b, and memory\n" + " checkpoint must be disabled" }, #endif { "devd", -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |