|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v7 COLO 05/18] primary vm suspend/resume/checkpoint code
From: Wen Congyang <wency@xxxxxxxxxxxxxx>
We will do the following things again and again:
1. Suspend primary vm
a. Suspend primary vm
b. do postsuspend
c. Read LIBXL_COLO_SVM_SUSPENDED sent by secondary
2. Resume primary vm
a. Read LIBXL_COLO_SVM_READY from slave
b. Do presume
c. Resume primary vm
d. Read LIBXL_COLO_SVM_RESUMED from slave
3. Wait a new checkpoint
a. Wait a new checkpoint(not implemented)
b. Send LIBXL_COLO_NEW_CHECKPOINT to slave
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
---
tools/libxl/Makefile | 2 +-
tools/libxl/libxl.c | 6 +-
tools/libxl/libxl_colo.h | 21 +-
tools/libxl/libxl_colo_save.c | 565 ++++++++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_dom_save.c | 13 +-
tools/libxl/libxl_internal.h | 121 +++++----
tools/libxl/libxl_types.idl | 1 +
7 files changed, 662 insertions(+), 67 deletions(-)
create mode 100644 tools/libxl/libxl_colo_save.c
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 66ae63d..252c4e9 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,7 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
endif
LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
-LIBXL_OBJS-y += libxl_colo_restore.o
+LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index f851957..8b866f4 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -17,6 +17,7 @@
#include "libxl_osdeps.h"
#include "libxl_internal.h"
+#include "libxl_colo.h"
#define PAGE_TO_MEMKB(pages) ((pages) * 4)
#define BACKEND_STRING_SIZE 5
@@ -842,7 +843,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx,
libxl_domain_remus_info *info,
assert(info);
/* Point of no return */
- libxl__remus_setup(egc, &dss->rs);
+ if (libxl_defbool_val(info->colo))
+ libxl__colo_save_setup(egc, &dss->css);
+ else
+ libxl__remus_setup(egc, &dss->rs);
return AO_INPROGRESS;
out:
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 91df275..49a430b 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -16,17 +16,6 @@
#ifndef LIBXL_COLO_H
#define LIBXL_COLO_H
-/*
- * values to control suspend/resume primary vm and secondary vm
- * at the same time
- */
-enum {
- LIBXL_COLO_NEW_CHECKPOINT = 1,
- LIBXL_COLO_SVM_SUSPENDED,
- LIBXL_COLO_SVM_READY,
- LIBXL_COLO_SVM_RESUMED,
-};
-
extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
int ret, int retval, int errnoval);
extern void libxl__colo_restore_setup(libxl__egc *egc,
@@ -35,4 +24,14 @@ extern void libxl__colo_restore_teardown(libxl__egc *egc,
libxl__colo_restore_state *crs,
int rc);
+extern void libxl__colo_save_domain_suspend_callback(void *data);
+extern void libxl__colo_save_domain_checkpoint_callback(void *data);
+extern void libxl__colo_save_domain_resume_callback(void *data);
+extern void libxl__colo_save_domain_should_checkpoint_callback(void *data);
+extern void libxl__colo_save_setup(libxl__egc *egc,
+ libxl__colo_save_state *css);
+extern void libxl__colo_save_teardown(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int rc);
+
#endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
new file mode 100644
index 0000000..4e059cc
--- /dev/null
+++ b/tools/libxl/libxl_colo_save.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ * Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+
+static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+ NULL,
+};
+
+/* ================= helper functions ================= */
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* init device subkind-specific state in the libxl ctx */
+ int rc;
+ STATE_AO_GC(cds->ao);
+
+ rc = 0;
+ return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* cleanup device subkind-specific state in the libxl ctx */
+ STATE_AO_GC(cds->ao);
+}
+
+/* ================= colo: setup save environment ================= */
+static void colo_save_setup_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_save_setup_failed(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *const cds = &css->cds;
+
+ STATE_AO_GC(dss->ao);
+
+ if (dss->type != LIBXL_DOMAIN_TYPE_HVM) {
+ LOG(ERROR, "COLO only supports hvm now");
+ goto out;
+ }
+
+ css->send_fd = dss->fd;
+ css->recv_fd = dss->recv_fd;
+ css->svm_running = false;
+
+ /* TODO: disk/nic support */
+ cds->device_kind_flags = 0;
+ cds->ops = colo_ops;
+ cds->callback = colo_save_setup_done;
+ cds->ao = ao;
+ cds->domid = dss->domid;
+
+ css->srs.ao = ao;
+ css->srs.fd = css->recv_fd;
+ css->srs.back_channel = true;
+ libxl__stream_read_start(egc, &css->srs);
+
+ if (init_device_subkind(cds))
+ goto out;
+
+ libxl__checkpoint_devices_setup(egc, &css->cds);
+
+ return;
+
+out:
+ libxl__ao_complete(egc, ao, ERROR_FAIL);
+}
+
+static void colo_save_setup_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+ STATE_AO_GC(cds->ao);
+
+ if (!rc) {
+ libxl__domain_save(egc, dss);
+ return;
+ }
+
+ LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
+ dss->domid);
+ css->cds.callback = colo_save_setup_failed;
+ libxl__checkpoint_devices_teardown(egc, &css->cds);
+}
+
+static void colo_save_setup_failed(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ STATE_AO_GC(cds->ao);
+
+ if (rc)
+ LOG(ERROR, "COLO: failed to teardown device after setup failed"
+ " for guest with domid %u, rc %d", cds->domid, rc);
+
+ cleanup_device_subkind(cds);
+ libxl__ao_complete(egc, ao, rc);
+}
+
+
+/* ================= colo: teardown save environment ================= */
+static void colo_teardown_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+void libxl__colo_save_teardown(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int rc)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ STATE_AO_GC(css->cds.ao);
+
+ LOG(WARN, "COLO: Domain suspend terminated with rc %d,"
+ " teardown COLO devices...", rc);
+ dss->css.cds.callback = colo_teardown_done;
+ libxl__checkpoint_devices_teardown(egc, &dss->css.cds);
+ return;
+}
+
+static void colo_teardown_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ cleanup_device_subkind(cds);
+ dss->callback(egc, dss, rc);
+}
+
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. suspend
+ * 2. resume
+ * 3. checkpoint
+ */
+static void colo_common_write_stream_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc);
+static void colo_common_read_stream_done(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc);
+/* ===================== colo: suspend primary vm ===================== */
+
+static void colo_read_svm_suspended_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id);
+/*
+ * Do the following things when suspending primary vm:
+ * 1. suspend primary vm
+ * 2. do postsuspend
+ * 3. read LIBXL_COLO_SVM_SUSPENDED
+ * 4. read secondary vm's dirty pages
+ */
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dsps,
+ int ok);
+static void colo_postsuspend_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+void libxl__colo_save_domain_suspend_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__egc *egc = shs->egc;
+ libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+ /* Convenience aliases */
+ libxl__domain_suspend_state *dsps = &dss->dsps;
+
+ dsps->callback_common_done = colo_suspend_primary_vm_done;
+ libxl__domain_suspend(egc, dsps);
+}
+
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dsps,
+ int ok)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(dsps, *dss, dsps);
+
+ STATE_AO_GC(dsps->ao);
+
+ if (!ok) {
+ LOG(ERROR, "cannot suspend primary vm");
+ goto out;
+ }
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *const cds = &dss->css.cds;
+
+ cds->callback = colo_postsuspend_cb;
+ libxl__checkpoint_devices_postsuspend(egc, cds);
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_postsuspend_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ int ok = 0;
+ libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ STATE_AO_GC(cds->ao);
+
+ if (rc) {
+ LOG(ERROR, "postsuspend fails");
+ goto out;
+ }
+
+ if (!css->svm_running) {
+ ok = 1;
+ goto out;
+ }
+
+ /*
+ * read COLO_SVM_SUSPENDED
+ */
+ css->callback = colo_read_svm_suspended_done;
+ css->srs.read_records_callback = colo_common_read_stream_done;
+ libxl__stream_read_colo_context(egc, &css->srs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_read_svm_suspended_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id)
+{
+ int ok = 0;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ STATE_AO_GC(css->cds.ao);
+
+ if (id != COLO_SVM_SUSPENDED) {
+ LOG(ERROR, "invalid section: %d, expected: %d", id,
COLO_SVM_SUSPENDED);
+ goto out;
+ }
+
+ ok = 1;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+
+/* ===================== colo: send tailbuf ========================== */
+void libxl__colo_save_domain_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+ /* Convenience aliases */
+ libxl__colo_save_state *const css = &dss->css;
+
+ /* write toolstack and emulator context, checkpoint end */
+ css->callback = NULL;
+ dss->sws.write_records_callback = colo_common_write_stream_done;
+ libxl__stream_write_start_checkpoint(shs->egc, &dss->sws);
+}
+
+/* ===================== colo: resume primary vm ===================== */
+/*
+ * Do the following things when resuming primary vm:
+ * 1. read LIBXL_COLO_SVM_READY
+ * 2. do preresume
+ * 3. resume primary vm
+ * 4. read LIBXL_COLO_SVM_RESUMED
+ */
+static void colo_preresume_dm_saved(libxl__egc *egc,
+ libxl__domain_save_state *dss, int rc);
+static void colo_read_svm_ready_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id);
+static void colo_preresume_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id);
+
+void libxl__colo_save_domain_resume_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__egc *egc = shs->egc;
+ libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+
+ /* This would go into tailbuf. */
+ if (dss->hvm) {
+ libxl__domain_save_device_model(egc, dss, colo_preresume_dm_saved);
+ } else {
+ colo_preresume_dm_saved(egc, dss, 0);
+ }
+
+ return;
+}
+
+static void colo_preresume_dm_saved(libxl__egc *egc,
+ libxl__domain_save_state *dss, int rc)
+{
+ /* Convenience aliases */
+ libxl__colo_save_state *const css = &dss->css;
+
+ STATE_AO_GC(css->cds.ao);
+
+ if (rc) {
+ LOG(ERROR, "Failed to save device model. Terminating COLO..");
+ goto out;
+ }
+
+ /* read COLO_SVM_READY */
+ css->callback = colo_read_svm_ready_done;
+ css->srs.read_records_callback = colo_common_read_stream_done;
+ libxl__stream_read_colo_context(egc, &css->srs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_read_svm_ready_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ STATE_AO_GC(css->cds.ao);
+
+ if (id != COLO_SVM_READY) {
+ LOG(ERROR, "invalid section: %d, expected: %d", id, COLO_SVM_READY);
+ goto out;
+ }
+
+ css->svm_running = true;
+ css->cds.callback = colo_preresume_cb;
+ libxl__checkpoint_devices_preresume(egc, &css->cds);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_preresume_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ STATE_AO_GC(cds->ao);
+
+ if (rc) {
+ LOG(ERROR, "preresume fails");
+ goto out;
+ }
+
+ /* Resumes the domain and the device model */
+ if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) {
+ LOG(ERROR, "cannot resume primary vm");
+ goto out;
+ }
+
+ /* read COLO_SVM_RESUMED */
+ css->callback = colo_read_svm_resumed_done;
+ css->srs.read_records_callback = colo_common_read_stream_done;
+ libxl__stream_read_colo_context(egc, &css->srs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id)
+{
+ int ok = 0;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ STATE_AO_GC(css->cds.ao);
+
+ if (id != COLO_SVM_RESUMED) {
+ LOG(ERROR, "invalid section: %d, expected: %d", id, COLO_SVM_RESUMED);
+ goto out;
+ }
+
+ ok = 1;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+
+/* ===================== colo: wait new checkpoint ===================== */
+/*
+ * Do the following things:
+ * 1. do commit
+ * 2. wait for a new checkpoint
+ * 3. write LIBXL_COLO_NEW_CHECKPOINT
+ */
+static void colo_device_commit_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_start_new_checkpoint(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+void libxl__colo_save_domain_should_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__domain_save_state *dss = CONTAINER_OF(shs, *dss, shs);
+ libxl__egc *egc = dss->shs.egc;
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *const cds = &dss->css.cds;
+
+ cds->callback = colo_device_commit_cb;
+ libxl__checkpoint_devices_commit(egc, cds);
+}
+
+static void colo_device_commit_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ STATE_AO_GC(cds->ao);
+
+ if (rc) {
+ LOG(ERROR, "commit fails");
+ goto out;
+ }
+
+ /* TODO: wait a new checkpoint */
+ colo_start_new_checkpoint(egc, cds, 0);
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+static void colo_start_new_checkpoint(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+ libxl_sr_colo_context colo_context = { .id = COLO_NEW_CHECKPOINT };
+
+ if (rc)
+ goto out;
+
+ /* write COLO_NEW_CHECKPOINT */
+ css->callback = NULL;
+ dss->sws.write_records_callback = colo_common_write_stream_done;
+ libxl__stream_write_colo_context(egc, &dss->sws, &colo_context);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, 0);
+}
+
+
+/* ===================== colo: common callback ===================== */
+static void colo_common_write_stream_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(stream, *dss, sws);
+ int ok;
+
+ /* Convenience aliases */
+ libxl__colo_save_state *const css = &dss->css;
+
+ STATE_AO_GC(stream->ao);
+
+ if (rc < 0) {
+ /* TODO: it may be a internal error, but we don't know */
+ LOG(ERROR, "sending data fails");
+ ok = 2;
+ goto out;
+ }
+
+ if (!css->callback) {
+ /* Everythins is OK */
+ ok = 1;
+ goto out;
+ }
+
+ css->callback(egc, css, 0);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
+
+static void colo_common_read_stream_done(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc)
+{
+ libxl__colo_save_state *css = CONTAINER_OF(stream, *css, srs);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+ int ok;
+
+ STATE_AO_GC(stream->ao);
+
+ if (rc < 0) {
+ /* TODO: it may be a internal error, but we don't know */
+ LOG(ERROR, "sending data fails");
+ ok = 2;
+ goto out;
+ }
+
+ if (!css->callback) {
+ /* Everythins is OK */
+ ok = 1;
+ goto out;
+ }
+
+ /* rc contains the id */
+ css->callback(egc, css, rc);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, ok);
+}
diff --git a/tools/libxl/libxl_dom_save.c b/tools/libxl/libxl_dom_save.c
index 9a3d009..26839cb 100644
--- a/tools/libxl/libxl_dom_save.c
+++ b/tools/libxl/libxl_dom_save.c
@@ -16,6 +16,7 @@
#include "libxl_osdeps.h" /* must come before any other headers */
#include "libxl_internal.h"
+#include "libxl_colo.h"
struct libxl__physmap_info {
uint64_t phys_offset;
@@ -437,6 +438,11 @@ void libxl__domain_save(libxl__egc *egc,
libxl__domain_save_state *dss)
callbacks->suspend = libxl__remus_domain_suspend_callback;
callbacks->postcopy = libxl__remus_domain_resume_callback;
callbacks->checkpoint = libxl__remus_domain_save_checkpoint_callback;
+ } else if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+ callbacks->suspend = libxl__colo_save_domain_suspend_callback;
+ callbacks->postcopy = libxl__colo_save_domain_resume_callback;
+ callbacks->checkpoint = libxl__colo_save_domain_checkpoint_callback;
+ callbacks->should_checkpoint =
libxl__colo_save_domain_should_checkpoint_callback;
} else
callbacks->suspend = libxl__domain_suspend_callback;
@@ -575,12 +581,15 @@ static void domain_save_done(libxl__egc *egc,
}
/*
- * With Remus, if we reach this point, it means either
+ * With Remus/COLO, if we reach this point, it means either
* backup died or some network error occurred preventing us
* from sending checkpoints. Teardown the network buffers and
* release netlink resources. This is an async op.
*/
- libxl__remus_teardown(egc, &dss->rs, rc);
+ if (libxl_defbool_val(dss->remus->colo))
+ libxl__colo_save_teardown(egc, &dss->css, rc);
+ else
+ libxl__remus_teardown(egc, &dss->rs, rc);
}
/*========================= Domain restore ============================*/
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 0aafd59..bb5e298 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2655,7 +2655,7 @@ typedef struct libxl__save_helper_state {
/*
* The abstract checkpoint device layer exposes a common
* set of API to [external] libxl for manipulating devices attached to
- * a guest protected by Remus. The device layer also exposes a set of
+ * a guest protected by Remus/COLO. The device layer also exposes a set of
* [internal] interfaces that every device type must implement.
*
* The following API are exposed to libxl:
@@ -2673,7 +2673,7 @@ typedef struct libxl__save_helper_state {
* +libxl__checkpoint_devices_commit
*
* Each device type needs to implement the interfaces specified in
- * the libxl__checkpoint_device_instance_ops if it wishes to support Remus.
+ * the libxl__checkpoint_device_instance_ops if it wishes to support
Remus/COLO.
*
* The high-level control flow through the checkpoint device layer is shown
* below:
@@ -2693,7 +2693,7 @@ typedef struct libxl__checkpoint_device_instance_ops
libxl__checkpoint_device_in
/*
* Interfaces to be implemented by every device subkind that wishes to
- * support Remus. Functions must be implemented unless otherwise
+ * support Remus/COLO. Functions must be implemented unless otherwise
* stated. Many of these functions are asynchronous. They call
* dev->aodev.callback when done. The actual implementations may be
* synchronous and call dev->aodev.callback directly (as the last
@@ -2873,6 +2873,66 @@ static inline bool libxl__convert_legacy_stream_inuse(
return libxl__ev_child_inuse(&chs->child);
}
+/* State for manipulating a libxl migration v2 stream */
+typedef struct libxl__stream_read_state libxl__stream_read_state;
+
+struct libxl__stream_read_state {
+ /* filled by the user */
+ libxl__ao *ao;
+ int fd;
+ bool legacy;
+ bool back_channel;
+ void (*completion_callback)(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc);
+ void (*read_records_callback)(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc);
+ /* Private */
+ libxl__carefd *v2_carefd;
+ int rc;
+ int joined_rc;
+ bool running;
+ bool in_checkpoint;
+ bool in_colo_context;
+ libxl__datacopier_state dc;
+ size_t expected_len;
+ libxl_sr_hdr hdr;
+ libxl_sr_rec_hdr rec_hdr;
+ void *rec_body;
+};
+
+_hidden void libxl__stream_read_start(libxl__egc *egc,
+ libxl__stream_read_state *stream);
+
+_hidden void libxl__stream_read_continue(libxl__egc *egc,
+ libxl__stream_read_state *stream);
+_hidden void libxl__stream_read_start_checkpoint(
+ libxl__egc *egc, libxl__stream_read_state *stream);
+_hidden void libxl__stream_read_colo_context(
+ libxl__egc *egc, libxl__stream_read_state *stream);
+
+_hidden void libxl__stream_read_abort(libxl__egc *egc,
+ libxl__stream_read_state *stream, int
rc);
+
+static inline bool libxl__stream_read_inuse(
+ const libxl__stream_read_state *stream)
+{
+ return stream->running;
+}
+
+/*----- colo related state structure -----*/
+typedef struct libxl__colo_save_state libxl__colo_save_state;
+struct libxl__colo_save_state {
+ libxl__checkpoint_devices_state cds;
+ int send_fd;
+ int recv_fd;
+
+ /* private */
+ libxl__stream_read_state srs;
+ void (*callback)(libxl__egc *, libxl__colo_save_state *, int);
+ bool svm_running;
+};
/*----- Domain suspend (save) state structure -----*/
@@ -2978,7 +3038,12 @@ struct libxl__domain_save_state {
libxl__domain_suspend_state dsps;
int hvm;
int xcflags;
- libxl__remus_state rs;
+ union {
+ /* for Remus */
+ libxl__remus_state rs;
+ /* for COLO */
+ libxl__colo_save_state css;
+ };
libxl__save_helper_state shs;
libxl__logdirty_switch logdirty;
/* private for libxl__domain_save_device_model */
@@ -3232,54 +3297,6 @@ typedef void libxl__domain_create_cb(libxl__egc *egc,
libxl__domain_create_state*,
int rc, uint32_t domid);
-/* State for manipulating a libxl migration v2 stream */
-typedef struct libxl__stream_read_state libxl__stream_read_state;
-
-struct libxl__stream_read_state {
- /* filled by the user */
- libxl__ao *ao;
- int fd;
- bool legacy;
- bool back_channel;
- void (*completion_callback)(libxl__egc *egc,
- libxl__stream_read_state *stream,
- int rc);
- void (*read_records_callback)(libxl__egc *egc,
- libxl__stream_read_state *stream,
- int rc);
- /* Private */
- libxl__carefd *v2_carefd;
- int rc;
- int joined_rc;
- bool running;
- bool in_checkpoint;
- bool in_colo_context;
- libxl__datacopier_state dc;
- size_t expected_len;
- libxl_sr_hdr hdr;
- libxl_sr_rec_hdr rec_hdr;
- void *rec_body;
-};
-
-_hidden void libxl__stream_read_start(libxl__egc *egc,
- libxl__stream_read_state *stream);
-
-_hidden void libxl__stream_read_continue(libxl__egc *egc,
- libxl__stream_read_state *stream);
-_hidden void libxl__stream_read_start_checkpoint(
- libxl__egc *egc, libxl__stream_read_state *stream);
-_hidden void libxl__stream_read_colo_context(
- libxl__egc *egc, libxl__stream_read_state *stream);
-
-_hidden void libxl__stream_read_abort(libxl__egc *egc,
- libxl__stream_read_state *stream, int
rc);
-
-static inline bool libxl__stream_read_inuse(
- const libxl__stream_read_state *stream)
-{
- return stream->running;
-}
-
/* colo related structure */
typedef struct libxl__colo_restore_state libxl__colo_restore_state;
typedef void libxl__colo_callback(libxl__egc *,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index e05d12b..cf1eeb2 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -696,6 +696,7 @@ libxl_domain_remus_info = Struct("domain_remus_info",[
("netbuf", libxl_defbool),
("netbufscript", string),
("diskbuf", libxl_defbool),
+ ("colo", libxl_defbool)
])
libxl_event_type = Enumeration("event_type", [
--
1.9.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |