[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH COLO v5 18/29] COLO: xc related codes



From: Wen Congyang <wency@xxxxxxxxxxxxxx>

Save:
1. send XC_SAVE_ID_LAST_CHECKPOINT, so secondary vm can be resumed
2. call callbacks->get_dirty_pfn() after suspend primary vm if we
   are doing checkpoint.

Restore:
1. call the callbacks resume/checkpoint/suspend if secondary vm's
   status is the same as primary vm's status.
2. zero out tdata because we will use it zero out pagebuf.tdata.
3. don't apply the secondary vm's state when we failed to get new
   secondary vm's state, because we have applied it every checkpoint.

Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 tools/libxc/xc_domain_restore.c | 82 +++++++++++++++++++++++++++++++++++------
 tools/libxc/xc_domain_save.c    | 57 +++++++++++++++++++++++++++-
 2 files changed, 125 insertions(+), 14 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index a382701..5cad21c 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1454,7 +1454,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     int nraces = 0;
 
     /* The new domain's shared-info frame number. */
-    unsigned long shared_info_frame;
+    unsigned long shared_info_frame = 0;
     unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
     shared_info_any_t *old_shared_info = 
         (shared_info_any_t *)shared_info_page;
@@ -1504,6 +1504,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
 
     DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
+    n = m = 0;
+
     pagebuf_init(&pagebuf);
     memset(&tailbuf, 0, sizeof(tailbuf));
     tailbuf.ishvm = hvm;
@@ -1629,7 +1631,6 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
      * We uncanonicalise page tables as we go.
      */
 
-    n = m = 0;
  loadpages:
     for ( ; ; )
     {
@@ -1793,26 +1794,45 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         goto finish;
     }
 
+new_checkpoint:
     // DPRINTF("Buffered checkpoint\n");
 
     if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
         PERROR("error when buffering batch, finishing");
-        /*
-         * Remus: discard the current incomplete checkpoint and restore
-         * backup from the last complete checkpoint.
-         */
-        goto finish;
+        if ( callbacks && callbacks->checkpoint )
+        {
+            /* COLO: discard the current incomplete checkpoint */
+            rc = 0;
+            goto failover;
+        }
+        else
+        {
+            /*
+             * Remus: discard the current incomplete checkpoint and restore
+             * backup from the last complete checkpoint.
+             */
+            goto finish;
+        }
     }
     memset(&tmptail, 0, sizeof(tmptail));
     tmptail.ishvm = hvm;
     if ( buffer_tail(xch, ctx, &tmptail, io_fd, max_vcpu_id, vcpumap,
                      ext_vcpucontext, vcpuextstate_size) < 0 ) {
         ERROR ("error buffering image tail, finishing");
-        /*
-         * Remus: discard the current incomplete checkpoint and restore
-         * backup from the last complete checkpoint.
-         */
-        goto finish;
+        if ( callbacks && callbacks->checkpoint )
+        {
+            /* COLO: discard the current incomplete checkpoint */
+            rc = 0;
+            goto failover;
+        }
+        else
+        {
+            /*
+             * Remus: discard the current incomplete checkpoint and restore
+             * backup from the last complete checkpoint.
+             */
+            goto finish;
+        }
     }
     tailbuf_free(&tailbuf);
     memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
@@ -2301,6 +2321,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
             free(tdata.data);
             goto out;
         }
+        memset(&tdata, 0, sizeof(tdata));
     }
 
     /* Dump the QEMU state to a state file for QEMU to load */
@@ -2368,6 +2389,43 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     rc = 0;
 
  out:
+    if ( !rc && callbacks && callbacks->checkpoint )
+    {
+#define HANDLE_CALLBACK_RETURN_VALUE(frc)                   \
+    do {                                                    \
+        if ( frc == 0 )                                     \
+        {                                                   \
+            /* Some internal error happens */               \
+            rc = 1;                                         \
+            goto out;                                       \
+        }                                                   \
+        else if ( frc == 2 )                                \
+        {                                                   \
+            /* Reading/writing error, do failover */        \
+            rc = 0;                                         \
+            goto failover;                                  \
+        }                                                   \
+    } while (0)
+        /* COLO */
+
+        /* TODO: call restore_results */
+
+        /* Resume secondary vm */
+        frc = callbacks->postcopy(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        /* wait for new checkpoint */
+        frc = callbacks->checkpoint(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        /* suspend secondary vm */
+        frc = callbacks->suspend(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        goto new_checkpoint;
+    }
+
+failover:
     if ( (rc != 0) && (dom != 0) )
         xc_domain_destroy(xch, dom);
     xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 045a050..07a6560 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -377,6 +377,31 @@ static int suspend_and_state(int (*suspend)(void*), void* 
data,
     return 0;
 }
 
+static int update_dirty_bitmap(uint8_t *(*get_dirty_pfn)(void *), void *data,
+                               unsigned long p2m_size, unsigned long *to_send)
+{
+    uint64_t *pfn_list;
+    uint64_t count, i;
+    uint64_t pfn;
+
+    pfn_list = (uint64_t *)get_dirty_pfn(data);
+    assert(pfn_list);
+
+    count = pfn_list[0];
+    for (i = 0; i < count; i++) {
+        pfn = pfn_list[i + 1];
+        if (pfn > p2m_size) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        set_bit(pfn, to_send);
+    }
+
+    free(pfn_list);
+    return 0;
+}
+
 /*
 ** Map the top-level page of MFNs from the guest. The guest might not have
 ** finished resuming from a previous restore operation, so we wait a while for
@@ -1773,11 +1798,14 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
uint32_t dom, uint32_t max_iter
         free(buf);
     }
 
-    if ( !callbacks->checkpoint )
+    if ( !callbacks->checkpoint || callbacks->get_dirty_pfn )
     {
         /*
          * If this is not a checkpointed save then this must be the first and
          * last checkpoint.
+         *
+         * If we are in colo mode, send last checkpoint to resume secondary
+         * vm.
          */
         i = XC_SAVE_ID_LAST_CHECKPOINT;
         if ( wrexact(io_fd, &i, sizeof(int)) )
@@ -2123,7 +2151,22 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
uint32_t dom, uint32_t max_iter
      * primary vm and secondary vm now.
      */
     if ( !rc && callbacks->postcopy && callbacks->get_dirty_pfn )
-        callbacks->postcopy(callbacks->data);
+    {
+        errno = 0;
+        if ( !callbacks->postcopy(callbacks->data) )
+        {
+            if (!errno)
+            {
+                errno = EIO;
+                ERROR("Postcopy request failed (without errno, using EINVAL)");
+            }
+            else
+            {
+                ERROR("Postcopy request failed");
+            }
+            rc = errno;
+        }
+    }
 
     /* Enable compression now, finally */
     compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
@@ -2152,6 +2195,16 @@ int xc_domain_save(xc_interface *xch, int io_fd, 
uint32_t dom, uint32_t max_iter
             PERROR("Error flushing shadow PT");
         }
 
+        if ( callbacks->get_dirty_pfn )
+        {
+            if ( update_dirty_bitmap(callbacks->get_dirty_pfn, callbacks->data,
+                                     dinfo->p2m_size, to_send) )
+            {
+                ERROR("getting secondary vm's dirty pages failed");
+                goto out;
+            }
+        }
+
         goto copypages;
     }
 
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.