[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH COLO v5 15/29] secondary vm suspend/resume/checkpoint code



From: Wen Congyang <wency@xxxxxxxxxxxxxx>

Secondary vm is running in colo mode. So we will do
the following things again and again:
1. Resume secondary vm
   a. Send LIBXL_COLO_SVM_READY to master.
   b. If it is not resumed the first time, call 
libxl__checkpoint_devices_preresume().
   c. If it is resumed the first time, call libxl__xc_domain_restore_done()
      to build the secondary vm. We should also enable secondary vm's logdirty.
      Otherwise, call libxl__domain_resume() to resume secondary vm.
   d. If it is resumed the first time, call libxl__checkpoint_devices_setup()
      to setup checkpoint devices.
   e. Send LIBXL_COLO_SVM_RESUMED to master.
2. Wait a new checkpoint
   a. Call libxl__checkpoint_devices_commit().
   a. Read LIBXL_COLO_NEW_CHECKPOINT from master.
3. Suspend secondary vm
   a. Suspend secondary vm.
   b. Call libxl__checkpoint_devices_postsuspend().
   c. Get secondary vm's dirty page information.
   d. Send LIBXL_COLO_SVM_SUSPENDED to master.
   e. Send secondary vm's dirty page information to master(count + pfn list).

Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 tools/libxc/include/xenguest.h     |   20 +
 tools/libxl/Makefile               |    1 +
 tools/libxl/libxl_colo.h           |   38 ++
 tools/libxl/libxl_colo_restore.c   | 1158 ++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_create.c         |  116 +++-
 tools/libxl/libxl_dom.c            |    2 +-
 tools/libxl/libxl_internal.h       |   23 +
 tools/libxl/libxl_save_callout.c   |    6 +-
 tools/libxl/libxl_save_msgs_gen.pl |    6 +-
 9 files changed, 1363 insertions(+), 7 deletions(-)
 create mode 100644 tools/libxl/libxl_colo.h
 create mode 100644 tools/libxl/libxl_colo_restore.c

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 601b108..6e621a6 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -93,6 +93,26 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom, uint32_t max_iter
 
 /* callbacks provided by xc_domain_restore */
 struct restore_callbacks {
+    /* Called after a new checkpoint to suspend the guest.
+     */
+    int (*suspend)(void* data);
+
+    /* Called after the secondary vm is ready to resume.
+     * Callback function resumes the guest & the device model,
+     *  returns to xc_domain_restore.
+     */
+    int (*postcopy)(void* data);
+
+    /* callback to wait a new checkpoint
+     *
+     * returns:
+     * 0: terminate checkpointing gracefully
+     * 1: take another checkpoint */
+    int (*checkpoint)(void* data);
+
+    /* Enable qemu-dm logging dirty pages to xen */
+    int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /* 
HVM only */
+
     /* callback to restore toolstack specific data */
     int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
             uint32_t size, void* data);
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 1e27754..8acfd5d 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -57,6 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o
 endif
 
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
+LIBXL_OBJS-y += libxl_colo_restore.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
new file mode 100644
index 0000000..91df275
--- /dev/null
+++ b/tools/libxl/libxl_colo.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#ifndef LIBXL_COLO_H
+#define LIBXL_COLO_H
+
+/*
+ * values to control suspend/resume primary vm and secondary vm
+ * at the same time
+ */
+enum {
+    LIBXL_COLO_NEW_CHECKPOINT = 1,
+    LIBXL_COLO_SVM_SUSPENDED,
+    LIBXL_COLO_SVM_READY,
+    LIBXL_COLO_SVM_RESUMED,
+};
+
+extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
+                                     int ret, int retval, int errnoval);
+extern void libxl__colo_restore_setup(libxl__egc *egc,
+                                      libxl__colo_restore_state *crs);
+extern void libxl__colo_restore_teardown(libxl__egc *egc,
+                                         libxl__colo_restore_state *crs,
+                                         int rc);
+
+#endif
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c
new file mode 100644
index 0000000..7b825d4
--- /dev/null
+++ b/tools/libxl/libxl_colo_restore.c
@@ -0,0 +1,1158 @@
+/*
+ * Copyright (C) 2014 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@xxxxxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+#include "libxl_bitops.h"
+
+#define XC_PAGE_SHIFT           12
+#define PAGE_SHIFT              XC_PAGE_SHIFT
+#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
+#define NRPAGES(x) (ROUNDUP(x, PAGE_SHIFT) >> PAGE_SHIFT)
+
+enum {
+    LIBXL_COLO_SETUPED,
+    LIBXL_COLO_SUSPENDED,
+    LIBXL_COLO_RESUMED,
+};
+
+typedef struct libxl__colo_restore_checkpoint_state 
libxl__colo_restore_checkpoint_state;
+struct libxl__colo_restore_checkpoint_state {
+    xc_hypercall_buffer_t _dirty_bitmap;
+    xc_hypercall_buffer_t *dirty_bitmap;
+    unsigned long p2m_size;
+    libxl__domain_suspend_state2 dss2;
+    libxl__datacopier_state dc;
+    uint8_t section;
+    libxl__logdirty_switch lds;
+    libxl__colo_restore_state *crs;
+    int status;
+    bool preresume;
+    /* used for teardown */
+    int teardown_devices;
+    int saved_rc;
+
+    void (*callback)(libxl__egc *,
+                     libxl__colo_restore_checkpoint_state *,
+                     int);
+
+    /*
+     * 0: secondary vm's dirty bitmap for domain @domid
+     * 1: secondary vm is ready(domain @domid)
+     * 2: secondary vm is resumed(domain @domid)
+     * 3. new checkpoint is triggered(domain @domid)
+     */
+    const char *copywhat[4];
+};
+
+
+static void libxl__colo_restore_domain_resume_callback(void *data);
+static void libxl__colo_restore_domain_checkpoint_callback(void *data);
+static void libxl__colo_restore_domain_suspend_callback(void *data);
+
+static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = {
+    NULL,
+};
+
+/* ===================== colo: common functions ===================== */
+static void colo_enable_logdirty(libxl__colo_restore_state *crs, libxl__egc 
*egc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    const uint32_t domid = crs->domid;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+
+    STATE_AO_GC(crs->ao);
+
+    /* we need to know which pages are dirty to restore the guest */
+    if (xc_shadow_control(CTX->xch, domid,
+                          XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                          NULL, 0, NULL, 0, NULL) < 0) {
+        LOG(ERROR, "cannot enable secondary vm's logdirty");
+        lds->callback(egc, lds, ERROR_FAIL);
+        return;
+    }
+
+    if (crs->hvm) {
+        libxl__domain_common_switch_qemu_logdirty(domid, 1, lds, egc);
+        return;
+    }
+
+    lds->callback(egc, lds, 0);
+}
+
+static void colo_disable_logdirty(libxl__colo_restore_state *crs,
+                                  libxl__egc *egc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    const uint32_t domid = crs->domid;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+
+    STATE_AO_GC(crs->ao);
+
+    /* we need to know which pages are dirty to restore the guest */
+    if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_OFF,
+                          NULL, 0, NULL, 0, NULL) < 0)
+        LOG(WARN, "cannot disable secondary vm's logdirty");
+
+    if (crs->hvm) {
+        libxl__domain_common_switch_qemu_logdirty(domid, 0, lds, egc);
+        return;
+    }
+
+    lds->callback(egc, lds, 0);
+}
+
+static void colo_resume_vm(libxl__egc *egc,
+                           libxl__colo_restore_checkpoint_state *crcs,
+                           int restore_device_model)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int rc;
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+
+    STATE_AO_GC(crs->ao);
+
+    if (!crs->saved_cb) {
+        /* TODO: sync mmu for hvm? */
+        if (restore_device_model) {
+            rc = libxl__domain_restore(gc, crs->domid);
+            if (rc) {
+                LOG(ERROR, "cannot restore device model for secondary vm");
+                crcs->callback(egc, crcs, rc);
+                return;
+            }
+        }
+        rc = libxl__domain_resume(gc, crs->domid, 0);
+        if (rc)
+            LOG(ERROR, "cannot resume secondary vm");
+
+        crcs->callback(egc, crcs, rc);
+        return;
+    }
+
+    /*
+     * TODO: get store mfn and console mfn
+     *  We should call the callback restore_results in
+     *  xc_domain_restore() before resuming the guest.
+     */
+    libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
+
+    return;
+}
+
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+    /* init device subkind-specific state in the libxl ctx */
+    int rc;
+    STATE_AO_GC(cds->ao);
+
+    rc = 0;
+    return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+    /* cleanup device subkind-specific state in the libxl ctx */
+    STATE_AO_GC(cds->ao);
+}
+
+
+/* ================ colo: setup restore environment ================ */
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+                                         libxl__domain_create_state *dcs,
+                                         int rc, uint32_t domid);
+
+static int init_dss2(libxl__domain_suspend_state2 *dss2)
+{
+    int rc = ERROR_FAIL;
+    libxl_domain_type type;
+
+    STATE_AO_GC(dss2->ao);
+
+    type = libxl__domain_type(gc, dss2->domid);
+    if (type == LIBXL_DOMAIN_TYPE_INVALID)
+        goto out;
+
+    libxl__xswait_init(&dss2->pvcontrol);
+    libxl__ev_evtchn_init(&dss2->guest_evtchn);
+    libxl__ev_xswatch_init(&dss2->guest_watch);
+    libxl__ev_time_init(&dss2->guest_timeout);
+
+    if (type == LIBXL_DOMAIN_TYPE_HVM)
+        dss2->hvm = 1;
+    else
+        dss2->hvm = 0;
+
+    dss2->guest_evtchn.port = -1;
+    dss2->guest_evtchn_lockfd = -1;
+    dss2->guest_responded = 0;
+    dss2->dm_savefile = libxl__device_model_savefile(gc, dss2->domid);
+    dss2->save_dm = 0;
+
+    /* Secondary vm is not created, so we cannot get evtchn port */
+
+    rc = 0;
+
+out:
+    return rc;
+}
+
+void libxl__colo_restore_setup(libxl__egc *egc,
+                               libxl__colo_restore_state *crs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs;
+    DECLARE_HYPERCALL_BUFFER(unsigned long, dirty_bitmap);
+    int rc = ERROR_FAIL;
+    int bsize;
+
+    /* Convenience aliases */
+    libxl__srm_restore_autogen_callbacks *const callbacks =
+        &dcs->shs.callbacks.restore.a;
+    const int domid = crs->domid;
+
+    STATE_AO_GC(crs->ao);
+
+    GCNEW(crcs);
+    crs->crcs = crcs;
+    crcs->crs = crs;
+
+    if (xc_domain_maximum_gpfn(CTX->xch, domid, &crcs->p2m_size) < 0) {
+        rc = ERROR_FAIL;
+        goto err;
+    }
+
+    crcs->copywhat[0] = GCSPRINTF("secondary vm's dirty bitmap for domain 
%"PRIu32,
+                                  domid);
+    crcs->copywhat[1] = GCSPRINTF("secondary vm is ready(domain %"PRIu32")",
+                                  domid);
+    crcs->copywhat[2] = GCSPRINTF("secondary vm is resumed(domain %"PRIu32")",
+                                  domid);
+    crcs->copywhat[3] = GCSPRINTF("new checkpoint is triggered(domain 
%"PRIu32")",
+                                  domid);
+
+    bsize = bitmap_size(crcs->p2m_size);
+    dirty_bitmap = xc_hypercall_buffer_alloc_pages(CTX->xch, dirty_bitmap,
+                                                   NRPAGES(bsize));
+    if (!dirty_bitmap) {
+        rc = ERROR_NOMEM;
+        goto err;
+    }
+    memset(dirty_bitmap, 0, bsize);
+    crcs->_dirty_bitmap = *HYPERCALL_BUFFER(dirty_bitmap);
+    crcs->dirty_bitmap = &crcs->_dirty_bitmap;
+
+    /* setup dss2 */
+    crcs->dss2.ao = ao;
+    crcs->dss2.domid = domid;
+    if (init_dss2(&crcs->dss2))
+        goto err_init_dss2;
+
+    callbacks->suspend = libxl__colo_restore_domain_suspend_callback;
+    callbacks->postcopy = libxl__colo_restore_domain_resume_callback;
+    callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback;
+
+    /*
+     * Secondary vm is running in colo mode, so we need to call
+     * libxl__xc_domain_restore_done() to create secondary vm.
+     * But we will exit in domain_create_cb(). So replace the
+     * callback here.
+     */
+    crs->saved_cb = dcs->callback;
+    dcs->callback = libxl__colo_domain_create_cb;
+    crcs->status = LIBXL_COLO_SETUPED;
+
+    logdirty_init(&crcs->lds);
+    crcs->lds.ao = ao;
+
+    rc = 0;
+
+out:
+    crs->callback(egc, crs, rc);
+    return;
+
+err_init_dss2:
+    xc_hypercall_buffer_free_pages(CTX->xch, dirty_bitmap, NRPAGES(bsize));
+    crcs->dirty_bitmap = NULL;
+err:
+    goto out;
+}
+
+static void libxl__colo_domain_create_cb(libxl__egc *egc,
+                                         libxl__domain_create_state *dcs,
+                                         int rc, uint32_t domid)
+{
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+    crcs->callback(egc, crcs, rc);
+}
+
+
+/* ================ colo: teardown restore environment ================ */
+static void colo_restore_teardown_done(libxl__egc *egc,
+                                       libxl__checkpoint_devices_state *cds,
+                                       int rc);
+static void do_failover_done(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state* crcs,
+                             int rc);
+static void colo_disable_logdirty_done(libxl__egc *egc,
+                                       libxl__logdirty_switch *lds,
+                                       int rc);
+
+static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs)
+{
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    const int status = crcs->status;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+
+    STATE_AO_GC(crs->ao);
+
+    switch(status) {
+    case LIBXL_COLO_SETUPED:
+        /* We don't enable logdirty now */
+        colo_resume_vm(egc, crcs, 0);
+        return;
+    case LIBXL_COLO_SUSPENDED:
+    case LIBXL_COLO_RESUMED:
+        /* disable logdirty first */
+        lds->callback = colo_disable_logdirty_done;
+        colo_disable_logdirty(crs, egc);
+        return;
+    default:
+        LOG(ERROR, "invalid status: %d", status);
+        crcs->callback(egc, crcs, ERROR_FAIL);
+    }
+}
+
+void libxl__colo_restore_teardown(libxl__egc *egc,
+                                  libxl__colo_restore_state *crs,
+                                  int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap, 
crcs->dirty_bitmap);
+    int bsize = bitmap_size(crcs->p2m_size);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+    EGC_GC;
+
+    if (!dirty_bitmap)
+        goto teardown_devices;
+
+    xc_hypercall_buffer_free_pages(CTX->xch, dirty_bitmap, NRPAGES(bsize));
+
+teardown_devices:
+    crcs->saved_rc = rc;
+    if (!crcs->teardown_devices) {
+        colo_restore_teardown_done(egc, &crs->cds, 0);
+        return;
+    }
+
+    crs->cds.callback = colo_restore_teardown_done;
+    libxl__checkpoint_devices_teardown(egc, &crs->cds);
+}
+
+static void colo_restore_teardown_done(libxl__egc *egc,
+                                       libxl__checkpoint_devices_state *cds,
+                                       int rc)
+{
+    libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+    EGC_GC;
+
+    if (rc)
+        LOG(ERROR, "COLO: failed to teardown device for guest with domid %u,"
+            " rc %d", cds->domid, rc);
+
+    if (crcs->teardown_devices)
+        cleanup_device_subkind(cds);
+
+    rc = crcs->saved_rc;
+    if (!rc) {
+        crcs->callback = do_failover_done;
+        do_failover(egc, crs);
+        return;
+    }
+
+    if (crs->saved_cb) {
+        dcs->callback = crs->saved_cb;
+        crs->saved_cb = NULL;
+    }
+    crs->callback(egc, crs, rc);
+}
+
+static void do_failover_done(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state* crcs,
+                             int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+
+    STATE_AO_GC(crs->ao);
+
+    if (rc)
+        LOG(ERROR, "cannot do failover");
+
+    if (crs->saved_cb) {
+        dcs->callback = crs->saved_cb;
+        crs->saved_cb = NULL;
+    }
+
+    crs->callback(egc, crs, rc);
+}
+
+static void colo_disable_logdirty_done(libxl__egc *egc,
+                                       libxl__logdirty_switch *lds,
+                                       int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+
+    STATE_AO_GC(lds->ao);
+
+    if (rc)
+        LOG(WARN, "cannot disable logdirty");
+
+    if (crcs->status == LIBXL_COLO_SUSPENDED) {
+        /*
+         * failover when reading state from master, so no need to
+         * call libxl__domain_restore().
+         */
+        colo_resume_vm(egc, crcs, 0);
+        return;
+    }
+
+    /* If we cannot disable logdirty, we still can do failover */
+    crcs->callback(egc, crcs, 0);
+}
+
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. resume
+ * 2. checkpoint
+ * 3. suspend
+ */
+static void colo_common_read_send_data_done(libxl__egc *egc,
+                                            libxl__datacopier_state *dc,
+                                            int onwrite, int errnoval);
+/* ===================== colo: resume secondary vm ===================== */
+/*
+ * Do the following things when resuming secondary vm the first time:
+ *  1. resume secondary vm
+ *  2. enable log dirty
+ *  3. setup checkpoint devices
+ *  4. write LIBXL_COLO_SVM_READY
+ *  5. unpause secondary vm
+ *  6. write LIBXL_COLO_SVM_RESUMED
+ *
+ * Do the following things when resuming secondary vm:
+ *  1. write LIBXL_COLO_SVM_READY
+ *  2. resume secondary vm
+ *  3. write LIBXL_COLO_SVM_RESUMED
+ */
+static void colo_send_svm_ready(libxl__egc *egc,
+                                libxl__colo_restore_checkpoint_state *crcs);
+static void colo_send_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_restore_checkpoint_state 
*crcs,
+                                     int rc);
+static void colo_restore_preresume_cb(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc);
+static void colo_restore_resume_vm(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs);
+static void colo_resume_vm_done(libxl__egc *egc,
+                                libxl__colo_restore_checkpoint_state *crcs,
+                                int rc);
+static void colo_write_svm_resumed(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs);
+static void colo_enable_logdirty_done(libxl__egc *egc,
+                                      libxl__logdirty_switch *lds,
+                                      int retval);
+static void colo_reenable_logdirty(libxl__egc *egc,
+                                   libxl__logdirty_switch *lds,
+                                   int rc);
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+                                        libxl__logdirty_switch *lds,
+                                        int rc);
+static void colo_setup_checkpoint_devices(libxl__egc *egc,
+                                          libxl__colo_restore_state *crs);
+static void colo_restore_setup_cds_done(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc);
+static void colo_unpause_svm(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state *crcs);
+
+static void libxl__colo_restore_domain_resume_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+    if (crcs->teardown_devices)
+        colo_send_svm_ready(shs->egc, crcs);
+    else
+        colo_restore_resume_vm(shs->egc, crcs);
+}
+
+static void colo_send_svm_ready(libxl__egc *egc,
+                               libxl__colo_restore_checkpoint_state *crcs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    uint8_t section = LIBXL_COLO_SVM_READY;
+    int rc;
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+    const int send_fd = crs->send_fd;
+    libxl__datacopier_state *const dc = &crcs->dc;
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crs->ao);
+
+    memset(dc, 0, sizeof(*dc));
+    dc->ao = ao;
+    dc->readfd = -1;
+    dc->writefd = send_fd;
+    dc->maxsz = INT_MAX;
+    dc->copywhat = crcs->copywhat[1];
+    dc->writewhat = "colo stream";
+    dc->callback = colo_common_read_send_data_done;
+    crcs->callback = colo_send_svm_ready_done;
+
+    rc = libxl__datacopier_start(dc);
+    if (rc) {
+        LOG(ERROR, "libxl__datacopier_start() fails");
+        goto out;
+    }
+
+    /* tell master that secondary vm is ready */
+    libxl__datacopier_prefixdata(egc, dc, &section, sizeof(section));
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_send_svm_ready_done(libxl__egc *egc,
+                                     libxl__colo_restore_checkpoint_state 
*crcs,
+                                     int rc)
+{
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &crcs->crs->cds;
+
+    if (!crcs->preresume) {
+        crcs->preresume = true;
+        colo_unpause_svm(egc, crcs);
+        return;
+    }
+
+    cds->callback = colo_restore_preresume_cb;
+    libxl__checkpoint_devices_preresume(egc, cds);
+}
+
+static void colo_restore_preresume_cb(libxl__egc *egc,
+                                      libxl__checkpoint_devices_state *cds,
+                                      int rc)
+{
+    libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crs->ao);
+
+    if (rc) {
+        LOG(ERROR, "preresume fails");
+        goto out;
+    }
+
+    colo_restore_resume_vm(egc, crcs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_restore_resume_vm(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs)
+{
+
+    crcs->callback = colo_resume_vm_done;
+    colo_resume_vm(egc, crcs, 1);
+}
+
+static void colo_resume_vm_done(libxl__egc *egc,
+                                libxl__colo_restore_checkpoint_state *crcs,
+                                int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+    libxl__logdirty_switch *const lds = &crcs->lds;
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crs->ao);
+
+    if (rc) {
+        LOG(ERROR, "cannot resume secondary vm");
+        goto out;
+    }
+
+    crcs->status = LIBXL_COLO_RESUMED;
+
+    /* avoid calling libxl__xc_domain_restore_done() more than once */
+    if (crs->saved_cb) {
+        dcs->callback = crs->saved_cb;
+        crs->saved_cb = NULL;
+
+        lds->callback = colo_enable_logdirty_done;
+        colo_enable_logdirty(crs, egc);
+        return;
+    }
+
+    colo_write_svm_resumed(egc, crcs);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_write_svm_resumed(libxl__egc *egc,
+                                   libxl__colo_restore_checkpoint_state *crcs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    uint8_t section = LIBXL_COLO_SVM_RESUMED;
+    int rc;
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+    const int send_fd = crs->send_fd;
+    libxl__datacopier_state *const dc = &crcs->dc;
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crs->ao);
+
+    memset(dc, 0, sizeof(*dc));
+    dc->ao = ao;
+    dc->readfd = -1;
+    dc->writefd = send_fd;
+    dc->maxsz = INT_MAX;
+    dc->copywhat = crcs->copywhat[2];
+    dc->writewhat = "colo stream";
+    dc->callback = colo_common_read_send_data_done;
+    crcs->callback = NULL;
+
+    rc = libxl__datacopier_start(dc);
+    if (rc) {
+        LOG(ERROR, "libxl__datacopier_start() fails");
+        goto out;
+    }
+
+    /* tell master that secondary vm is resumed */
+    libxl__datacopier_prefixdata(egc, dc, &section, sizeof(section));
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_enable_logdirty_done(libxl__egc *egc,
+                                      libxl__logdirty_switch *lds,
+                                      int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+
+    STATE_AO_GC(crs->ao);
+
+    if (rc) {
+        /*
+         * log-dirty already enabled? There's no test op,
+         * so attempt to disable then reenable it
+         */
+        lds->callback = colo_reenable_logdirty;
+        colo_disable_logdirty(crs, egc);
+        return;
+    }
+
+    colo_setup_checkpoint_devices(egc, crs);
+}
+
+static void colo_reenable_logdirty(libxl__egc *egc,
+                                   libxl__logdirty_switch *lds,
+                                   int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__colo_restore_state *const crs = crcs->crs;
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crs->ao);
+
+    if (rc) {
+        LOG(ERROR, "cannot enable logdirty");
+        goto out;
+    }
+
+    lds->callback = colo_reenable_logdirty_done;
+    colo_enable_logdirty(crs, egc);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_reenable_logdirty_done(libxl__egc *egc,
+                                        libxl__logdirty_switch *lds,
+                                        int rc)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crcs->crs->ao);
+
+    if (rc) {
+        LOG(ERROR, "cannot enable logdirty");
+        goto out;
+    }
+
+    colo_setup_checkpoint_devices(egc, crcs->crs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+/*
+ * We cannot setup checkpoint devices in libxl__colo_restore_setup(),
+ * because the guest is not ready.
+ */
+static void colo_setup_checkpoint_devices(libxl__egc *egc,
+                                          libxl__colo_restore_state *crs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &crs->cds;
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crs->ao);
+
+    /* TODO: disk/nic support */
+    cds->device_kind_flags = 0;
+    cds->callback = colo_restore_setup_cds_done;
+    cds->ao = ao;
+    cds->domid = crs->domid;
+    cds->ops = colo_restore_ops;
+
+    if (init_device_subkind(cds))
+        goto out;
+
+    crcs->teardown_devices = 1;
+
+    libxl__checkpoint_devices_setup(egc, cds);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_restore_setup_cds_done(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc)
+{
+    libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
+            cds->domid);
+        goto out;
+    }
+
+    colo_send_svm_ready(egc, crcs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+static void colo_unpause_svm(libxl__egc *egc,
+                             libxl__colo_restore_checkpoint_state *crcs)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int rc;
+
+    /* Convenience aliases */
+    const uint32_t domid = crcs->crs->domid;
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(crcs->crs->ao);
+
+    /* We have enabled secondary vm's logdirty, so we can unpause it now */
+    rc = libxl__domain_unpause(gc, domid);
+    if (rc) {
+        LOG(ERROR, "cannot unpause secondary vm");
+        goto out;
+    }
+
+    colo_write_svm_resumed(egc, crcs);
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0);
+}
+
+
+/* ===================== colo: wait new checkpoint ===================== */
+static void colo_restore_commit_cb(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc);
+static void colo_stream_read_done(libxl__egc *egc,
+                                  libxl__colo_restore_checkpoint_state *crcs,
+                                  int real_size);
+
+static void libxl__colo_restore_domain_checkpoint_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &dcs->crs.cds;
+
+    cds->callback = colo_restore_commit_cb;
+    libxl__checkpoint_devices_commit(shs->egc, cds);
+}
+
+static void colo_restore_commit_cb(libxl__egc *egc,
+                                   libxl__checkpoint_devices_state *cds,
+                                   int rc)
+{
+    libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+
+    /* Convenience aliases */
+    const int recv_fd = dcs->crs.recv_fd;
+    libxl__save_helper_state *const shs = &dcs->shs;
+    libxl__datacopier_state *const dc = &crcs->dc;
+
+    STATE_AO_GC(cds->ao);
+
+    if (rc) {
+        LOG(ERROR, "commit fails");
+        goto out;
+    }
+
+    memset(dc, 0, sizeof(*dc));
+    dc->ao = ao;
+    dc->readfd = recv_fd;
+    dc->writefd = -1;
+    dc->maxsz = INT_MAX;
+    dc->copywhat = crcs->copywhat[3];
+    dc->readwhat = "colo stream";
+    dc->callback = colo_common_read_send_data_done;
+    dc->readbuf = &crcs->section;
+    dc->bytes_to_read = 1;
+    crcs->callback = colo_stream_read_done;
+
+    rc = libxl__datacopier_start(dc);
+    if (rc) {
+        LOG(ERROR, "libxl__datacopier_start() fails");
+        goto out;
+    }
+
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0);
+}
+
+static void colo_stream_read_done(libxl__egc *egc,
+                                  libxl__colo_restore_checkpoint_state *crcs,
+                                  int real_size)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int ok = 0;
+
+    /* Convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    STATE_AO_GC(dcs->ao);
+
+    if (real_size != 1) {
+        LOG(ERROR, "reading data fails: %lld", (long long)real_size);
+        goto out;
+    }
+
+    if (crcs->section != LIBXL_COLO_NEW_CHECKPOINT) {
+        LOG(ERROR, "invalid section: %d", crcs->section);
+        goto out;
+    }
+
+    ok = 1;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, shs, ok);
+}
+
+
+/* ===================== colo: suspend secondary vm ===================== */
+/*
+ * Do the following things when resuming secondary vm:
+ *  1. suspend secondary vm
+ *  2. get secondary vm's dirty page information
+ *  3. send LIBXL_COLO_SVM_SUSPENDED
+ *  4. send secondary vm's dirty page information(count + pfn list)
+ */
+static void colo_suspend_vm_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state2 *dss2,
+                                 int ok);
+static void colo_restore_postsuspend_cb(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc);
+static void colo_append_pfn_type(libxl__egc *egc,
+                                 libxl__datacopier_state *dc,
+                                 unsigned long *dirty_bitmap,
+                                 unsigned long p2m_size);
+
+static void libxl__colo_restore_domain_suspend_callback(void *data)
+{
+    libxl__save_helper_state *shs = data;
+    libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs);
+    libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs;
+
+    STATE_AO_GC(dcs->ao);
+
+    /* Convenience aliases */
+    libxl__domain_suspend_state2 *const dss2 = &crcs->dss2;
+
+    /* suspend secondary vm */
+    dss2->callback_common_done = colo_suspend_vm_done;
+
+    libxl__domain_suspend2(shs->egc, dss2);
+}
+
+static void colo_suspend_vm_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state2 *dss2,
+                                 int ok)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dss2, *crcs, 
dss2);
+    libxl__colo_restore_state *crs = crcs->crs;
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+    /* Convenience aliases */
+    libxl__checkpoint_devices_state *cds = &crs->cds;
+
+    STATE_AO_GC(crs->ao);
+
+    if (!ok) {
+        LOG(ERROR, "cannot suspend secondary vm");
+        goto out;
+    }
+
+    crcs->status = LIBXL_COLO_SUSPENDED;
+
+    cds->callback = colo_restore_postsuspend_cb;
+    libxl__checkpoint_devices_postsuspend(egc, cds);
+
+    return;
+
+out:
+    ok = 0;
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, ok);
+}
+
+static void colo_restore_postsuspend_cb(libxl__egc *egc,
+                                        libxl__checkpoint_devices_state *cds,
+                                        int rc)
+{
+    libxl__colo_restore_state *crs = CONTAINER_OF(cds, *crs, cds);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    libxl__colo_restore_checkpoint_state *crcs = crs->crcs;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap, 
crcs->dirty_bitmap);
+    uint8_t section = LIBXL_COLO_SVM_SUSPENDED;
+    int i, ok = 0;
+    uint64_t count;
+
+    /* Convenience aliases */
+    const int send_fd = crs->send_fd;
+    const unsigned long p2m_size = crcs->p2m_size;
+    const uint32_t domid = crs->domid;
+    libxl__datacopier_state *const dc = &crcs->dc;
+
+    STATE_AO_GC(crs->ao);
+
+    if (rc) {
+        LOG(ERROR, "postsuspend fails");
+        goto out;
+    }
+
+    /*
+     * Secondary vm is running, so there are some dirty pages
+     * that are non-dirty in master. Get dirty bitmap and
+     * send it to master.
+     */
+    if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+                          HYPERCALL_BUFFER(dirty_bitmap), p2m_size,
+                          NULL, 0, NULL) != p2m_size) {
+        LOG(ERROR, "getting secondary vm's dirty bitmap fails");
+        goto out;
+    }
+
+    count = 0;
+    for (i = 0; i < p2m_size; i++) {
+        if (test_bit(i, dirty_bitmap))
+            count++;
+    }
+
+    memset(dc, 0, sizeof(*dc));
+    dc->ao = ao;
+    dc->readfd = -1;
+    dc->writefd = send_fd;
+    dc->maxsz = INT_MAX;
+    dc->copywhat = crcs->copywhat[0];
+    dc->writewhat = "colo stream";
+    dc->callback = colo_common_read_send_data_done;
+    crcs->callback = NULL;
+
+    rc = libxl__datacopier_start(dc);
+    if (rc) {
+        LOG(ERROR, "libxl__datacopier_start() fails");
+        goto out;
+    }
+
+    /* tell master that secondary vm is suspended */
+    libxl__datacopier_prefixdata(egc, dc, &section, sizeof(section));
+
+    /* send dirty pages to master */
+    libxl__datacopier_prefixdata(egc, dc, &count, sizeof(count));
+    colo_append_pfn_type(egc, dc, dirty_bitmap, p2m_size);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, ok);
+}
+
+static void colo_append_pfn_type(libxl__egc *egc,
+                                 libxl__datacopier_state *dc,
+                                 unsigned long *dirty_bitmap,
+                                 unsigned long p2m_size)
+{
+    int i, count;
+    /* Hack, buf->buf is private member... */
+    libxl__datacopier_buf *buf = NULL;
+    int max_batch = sizeof(buf->buf) / sizeof(uint64_t);
+    int buf_size = max_batch * sizeof(uint64_t);
+    uint64_t *pfn;
+
+    STATE_AO_GC(dc->ao);
+
+    pfn = libxl__zalloc(NOGC, buf_size);
+
+    count = 0;
+    for (i = 0; i < p2m_size; i++) {
+        if (!test_bit(i, dirty_bitmap))
+            continue;
+
+        pfn[count++] = i;
+        if (count == max_batch) {
+            libxl__datacopier_prefixdata(egc, dc, pfn, buf_size);
+            count = 0;
+        }
+    }
+
+    if (count)
+        libxl__datacopier_prefixdata(egc, dc, pfn, count * sizeof(uint64_t));
+
+    free(pfn);
+}
+
+
+/* ===================== colo: common callback ===================== */
+static void colo_common_read_send_data_done(libxl__egc *egc,
+                                            libxl__datacopier_state *dc,
+                                            int onwrite, int errnoval)
+{
+    libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dc, *crcs, dc);
+    libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs);
+    int ok;
+    STATE_AO_GC(dc->ao);
+
+    if (onwrite == -1) {
+        LOG(ERROR, "reading/sending data fails");
+        ok = 0;
+        goto out;
+    }
+
+    if (errnoval < 0 || (onwrite == 1 && errnoval)) {
+        /* failure happens when reading/writing, do failover? */
+        ok = 2;
+        goto out;
+    }
+
+    if (!crcs->callback) {
+        /* Everythins is OK */
+        ok = 1;
+        goto out;
+    }
+
+    if (onwrite == 0)
+        crcs->callback(egc, crcs, dc->used);
+    else
+        crcs->callback(egc, crcs, 0);
+    return;
+
+out:
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, ok);
+}
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 392420f..ba6e1fe 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -19,6 +19,7 @@
 
 #include "libxl_internal.h"
 #include "libxl_arch.h"
+#include "libxl_colo.h"
 
 #include <xc_dom.h>
 #include <xenguest.h>
@@ -956,6 +957,96 @@ static void domcreate_console_available(libxl__egc *egc,
                                         dcs->aop_console_how.for_event));
 }
 
+static void libxl__colo_restore_teardown_done(libxl__egc *egc,
+                                              libxl__colo_restore_state *crs,
+                                              int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    STATE_AO_GC(crs->ao);
+
+    /* convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->shs;
+    const int domid = crs->domid;
+    const libxl_ctx *const ctx = libxl__gc_owner(gc);
+    xc_interface *const xch = ctx->xch;
+
+    if (!rc)
+        /* failover, no need to destroy the secondary vm */
+        goto out;
+
+    if (shs->retval)
+        /*
+         * shs->retval stores the return value of xc_domain_restore().
+         * If it is not 0, we have destroyed the secondary vm in
+         * xc_domain_restore();
+         */
+        goto out;
+
+    xc_domain_destroy(xch, domid);
+
+out:
+    dcs->callback(egc, dcs, rc, crs->domid);
+}
+
+void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void,
+                              int ret, int retval, int errnoval)
+{
+    libxl__domain_create_state *dcs = dcs_void;
+    int rc = 1;
+
+    /* convenience aliases */
+    libxl__colo_restore_state *const crs = &dcs->crs;
+    STATE_AO_GC(crs->ao);
+
+    /* teardown and failover */
+    crs->callback = libxl__colo_restore_teardown_done;
+
+    if (ret == 0 && retval == 0)
+        rc = 0;
+
+    LOG(INFO, "%s", rc ? "colo fails" : "failover");
+    libxl__colo_restore_teardown(egc, crs, rc);
+}
+
+static void libxl__colo_restore_cp_done(libxl__egc *egc,
+                                        libxl__colo_restore_state *crs,
+                                        int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+    int ok = 0;
+
+    /* convenience aliases */
+    libxl__save_helper_state *const shs = &dcs->shs;
+
+    if (!rc)
+        ok = 1;
+
+    libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, ok);
+}
+
+static void libxl__colo_restore_setup_done(libxl__egc *egc,
+                                           libxl__colo_restore_state *crs,
+                                           int rc)
+{
+    libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs);
+
+    /* convenience aliases */
+    const int hvm = crs->hvm;
+    const int superpages = crs->superpages;
+    const int pae = crs->pae;
+    STATE_AO_GC(crs->ao);
+
+    if (rc) {
+        LOG(ERROR, "colo restore setup fails: %d", rc);
+        libxl__xc_domain_restore_done(egc, dcs, rc, 0, 0);
+        return;
+    }
+
+    crs->callback = libxl__colo_restore_cp_done;
+    libxl__xc_domain_restore(egc, dcs,
+                             hvm, pae, superpages);
+}
+
 static void domcreate_bootloader_done(libxl__egc *egc,
                                       libxl__bootloader_state *bl,
                                       int rc)
@@ -971,6 +1062,8 @@ static void domcreate_bootloader_done(libxl__egc *egc,
     libxl__domain_build_state *const state = &dcs->build_state;
     libxl__srm_restore_autogen_callbacks *const callbacks =
         &dcs->shs.callbacks.restore.a;
+    const int checkpointed_stream = dcs->checkpointed_stream;
+    libxl__colo_restore_state *const crs = &dcs->crs;
 
     if (rc) {
         domcreate_rebuild_done(egc, dcs, rc);
@@ -999,6 +1092,13 @@ static void domcreate_bootloader_done(libxl__egc *egc,
 
     /* Restore */
 
+    /* COLO only supports HVM now */
+    if (info->type != LIBXL_DOMAIN_TYPE_HVM &&
+        checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
     rc = libxl__build_pre(gc, domid, d_config, state);
     if (rc)
         goto out;
@@ -1021,8 +1121,20 @@ static void domcreate_bootloader_done(libxl__egc *egc,
         rc = ERROR_INVAL;
         goto out;
     }
-    libxl__xc_domain_restore(egc, dcs,
-                             hvm, pae, superpages);
+
+    if (checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) {
+        crs->ao = ao;
+        crs->domid = domid;
+        crs->send_fd = dcs->send_fd;
+        crs->recv_fd = restore_fd;
+        crs->hvm = hvm;
+        crs->superpages = superpages;
+        crs->pae = pae;
+        crs->callback = libxl__colo_restore_setup_done;
+        libxl__colo_restore_setup(egc, crs);
+    } else
+        libxl__xc_domain_restore(egc, dcs,
+                                 hvm, pae, superpages);
     return;
 
  out:
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index e09a1eb..0eb0f8c 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1114,7 +1114,7 @@ static void switch_logdirty_xswatch(libxl__egc *egc, 
libxl__ev_xswatch*,
 static void switch_logdirty_done(libxl__egc *egc,
                                  libxl__logdirty_switch *lds, int ok);
 
-static void logdirty_init(libxl__logdirty_switch *lds)
+void logdirty_init(libxl__logdirty_switch *lds)
 {
     lds->cmd_path = 0;
     libxl__ev_xswatch_init(&lds->watch);
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 971d975..686a39e 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2843,6 +2843,7 @@ struct libxl__logdirty_switch {
     libxl__ev_xswatch watch;
     libxl__ev_time timeout;
 };
+_hidden void logdirty_init(libxl__logdirty_switch *lds);
 
 /*
  * libxl__domain_suspend_state is for saving guest, not
@@ -3135,6 +3136,27 @@ typedef void libxl__domain_create_cb(libxl__egc *egc,
                                      libxl__domain_create_state*,
                                      int rc, uint32_t domid);
 
+/* colo related structure */
+typedef struct libxl__colo_restore_state libxl__colo_restore_state;
+typedef void libxl__colo_callback(libxl__egc *,
+                                  libxl__colo_restore_state *, int rc);
+struct libxl__colo_restore_state {
+    /* must set by caller of libxl__colo_(setup|teardown) */
+    libxl__ao *ao;
+    uint32_t domid;
+    int send_fd;
+    int recv_fd;
+    int hvm;
+    int pae;
+    int superpages;
+    libxl__colo_callback *callback;
+
+    /* private, colo restore checkpoint state */
+    libxl__domain_create_cb *saved_cb;
+    void *crcs;
+    libxl__checkpoint_devices_state cds;
+};
+
 struct libxl__domain_create_state {
     /* filled in by user */
     libxl__ao *ao;
@@ -3148,6 +3170,7 @@ struct libxl__domain_create_state {
     int guest_domid;
     int checkpointed_stream;
     libxl__domain_build_state build_state;
+    libxl__colo_restore_state crs;
     libxl__bootloader_state bl;
     libxl__stub_dm_spawn_state dmss;
         /* If we're not doing stubdom, we use only dmss.dm,
diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c
index 477e633..9082b66 100644
--- a/tools/libxl/libxl_save_callout.c
+++ b/tools/libxl/libxl_save_callout.c
@@ -15,6 +15,7 @@
 #include "libxl_osdeps.h"
 
 #include "libxl_internal.h"
+#include "libxl_colo.h"
 
 /* stream_fd is as from the caller (eventually, the application).
  * It may be 0, 1 or 2, in which case we need to dup it elsewhere.
@@ -65,7 +66,10 @@ void libxl__xc_domain_restore(libxl__egc *egc, 
libxl__domain_create_state *dcs,
     dcs->shs.ao = ao;
     dcs->shs.domid = domid;
     dcs->shs.recv_callback = libxl__srm_callout_received_restore;
-    dcs->shs.completion_callback = libxl__xc_domain_restore_done;
+    if (dcs->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO)
+        dcs->shs.completion_callback = libxl__colo_restore_done;
+    else
+        dcs->shs.completion_callback = libxl__xc_domain_restore_done;
     dcs->shs.caller_state = dcs;
     dcs->shs.need_results = 1;
     dcs->shs.toolstack_data_file = 0;
diff --git a/tools/libxl/libxl_save_msgs_gen.pl 
b/tools/libxl/libxl_save_msgs_gen.pl
index 41ee000..0239cac 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -24,9 +24,9 @@ our @msgs = (
                                                  STRING doing_what),
                                                 'unsigned long', 'done',
                                                 'unsigned long', 'total'] ],
-    [  3, 'scxA',   "suspend", [] ],
-    [  4, 'scxA',   "postcopy", [] ],
-    [  5, 'scxA',   "checkpoint", [] ],
+    [  3, 'srcxA',   "suspend", [] ],
+    [  4, 'srcxA',   "postcopy", [] ],
+    [  5, 'srcxA',   "checkpoint", [] ],
     [  6, 'scxA',   "switch_qemu_logdirty",  [qw(int domid
                                               unsigned enable)] ],
     #                toolstack_save          done entirely `by hand'
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.