[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 25/28] tools/libx{c, l}: Introduce restore_callbacks.checkpoint()



And call it when a checkpoint record is found in the libxc stream.

Some parts of this patch have been based on patches from the COLO
series.

Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Acked-by: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>

---
v3: Named constants for the API
v2: Borrow sufficient fragments from several COLO patches to get
    BROKEN_CHANNEL and checkpoint failover to function.
---
 tools/libxc/include/xenguest.h     |    7 +++++
 tools/libxc/xc_sr_common.h         |    7 +++--
 tools/libxc/xc_sr_restore.c        |   53 ++++++++++++++++++++++++++----------
 tools/libxl/libxl_save_msgs_gen.pl |    2 +-
 4 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 7581263..e95af54 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -102,6 +102,13 @@ struct restore_callbacks {
     int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
             uint32_t size, void* data);
 
+    /* A checkpoint record has been found in the stream.
+     * returns: */
+#define XGR_CHECKPOINT_ERROR    0 /* Terminate processing */
+#define XGR_CHECKPOINT_SUCCESS  1 /* Continue reading more data from the 
stream */
+#define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */
+    int (*checkpoint)(void* data);
+
     /* to be provided as the last argument to each callback function */
     void* data;
 };
diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index 08c66db..1f4d4e4 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -130,10 +130,13 @@ struct xc_sr_restore_ops
      * Process an individual record from the stream.  The caller shall take
      * care of processing common records (e.g. END, PAGE_DATA).
      *
-     * @return 0 for success, -1 for failure, or the sentinel value
-     * RECORD_NOT_PROCESSED.
+     * @return 0 for success, -1 for failure, or the following sentinels:
+     *  - RECORD_NOT_PROCESSED
+     *  - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
+     *    a failover is needed.
      */
 #define RECORD_NOT_PROCESSED 1
+#define BROKEN_CHANNEL 2
     int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
 
     /**
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 9e27dba..18ba411 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -1,5 +1,7 @@
 #include <arpa/inet.h>
 
+#include <assert.h>
+
 #include "xc_sr_common.h"
 
 /*
@@ -472,7 +474,7 @@ static int handle_page_data(struct xc_sr_context *ctx, 
struct xc_sr_record *rec)
 static int handle_checkpoint(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
-    int rc = 0;
+    int rc = 0, ret;
     unsigned i;
 
     if ( !ctx->restore.checkpointed )
@@ -482,6 +484,21 @@ static int handle_checkpoint(struct xc_sr_context *ctx)
         goto err;
     }
 
+    ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
+    switch ( ret )
+    {
+    case XGR_CHECKPOINT_SUCCESS:
+        break;
+
+    case XGR_CHECKPOINT_FAILOVER:
+        rc = BROKEN_CHANNEL;
+        goto err;
+
+    default: /* Other fatal error */
+        rc = -1;
+        goto err;
+    }
+
     if ( ctx->restore.buffer_all_records )
     {
         IPRINTF("All records buffered");
@@ -560,19 +577,6 @@ static int process_record(struct xc_sr_context *ctx, 
struct xc_sr_record *rec)
     free(rec->data);
     rec->data = NULL;
 
-    if ( rc == RECORD_NOT_PROCESSED )
-    {
-        if ( rec->type & REC_TYPE_OPTIONAL )
-            DPRINTF("Ignoring optional record %#x (%s)",
-                    rec->type, rec_type_to_str(rec->type));
-        else
-        {
-            ERROR("Mandatory record %#x (%s) not handled",
-                  rec->type, rec_type_to_str(rec->type));
-            rc = -1;
-        }
-    }
-
     return rc;
 }
 
@@ -678,7 +682,22 @@ static int restore(struct xc_sr_context *ctx)
         else
         {
             rc = process_record(ctx, &rec);
-            if ( rc )
+            if ( rc == RECORD_NOT_PROCESSED )
+            {
+                if ( rec.type & REC_TYPE_OPTIONAL )
+                    DPRINTF("Ignoring optional record %#x (%s)",
+                            rec.type, rec_type_to_str(rec.type));
+                else
+                {
+                    ERROR("Mandatory record %#x (%s) not handled",
+                          rec.type, rec_type_to_str(rec.type));
+                    rc = -1;
+                    goto err;
+                }
+            }
+            else if ( rc == BROKEN_CHANNEL )
+                goto remus_failover;
+            else if ( rc )
                 goto err;
         }
 
@@ -735,6 +754,10 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, 
uint32_t dom,
     ctx.restore.checkpointed = checkpointed_stream;
     ctx.restore.callbacks = callbacks;
 
+    /* Sanity checks for callbacks. */
+    if ( checkpointed_stream )
+        assert(callbacks->checkpoint);
+
     IPRINTF("In experimental %s", __func__);
     DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d"
             ", checkpointed_stream %d", io_fd, dom, hvm, pae,
diff --git a/tools/libxl/libxl_save_msgs_gen.pl 
b/tools/libxl/libxl_save_msgs_gen.pl
index 6b4b65e..825d5cc 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -25,7 +25,7 @@ our @msgs = (
                                                 'unsigned long', 'total'] ],
     [  3, 'scxA',   "suspend", [] ],
     [  4, 'scxA',   "postcopy", [] ],
-    [  5, 'scxA',   "checkpoint", [] ],
+    [  5, 'srcxA',  "checkpoint", [] ],
     [  6, 'scxA',   "switch_qemu_logdirty",  [qw(int domid
                                               unsigned enable)] ],
     #                toolstack_save          done entirely `by hand'
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.