[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH Remus v6 3/3] libxc/restore: implement Remus checkpointed restore



With Remus, the restore flow should be:
the first full migration stream -> { periodically restore stream }

Signed-off-by: Yang Hongyang <yanghy@xxxxxxxxxxxxxx>
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 tools/libxc/xc_sr_common.h  |  15 +++++
 tools/libxc/xc_sr_restore.c | 133 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 135 insertions(+), 13 deletions(-)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index f8121e7..565c5da 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -208,6 +208,21 @@ struct xc_sr_context
             /* Plain VM, or checkpoints over time. */
             bool checkpointed;
 
+            /* Currently buffering records between a checkpoint */
+            bool buffer_all_records;
+
+/*
+ * With Remus, we buffer the records sent by the primary at checkpoint,
+ * in case the primary will fail, we can recover from the last
+ * checkpoint state.
+ * This should be enough for most of the cases because primary only send
+ * dirty pages at checkpoint.
+ */
+#define DEFAULT_BUF_RECORDS 1024
+            struct xc_sr_record *buffered_records;
+            unsigned allocated_rec_num;
+            unsigned buffered_rec_num;
+
             /*
              * Xenstore and Console parameters.
              * INPUT:  evtchn & domid
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c
index 9ab5760..fc47f43 100644
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -468,6 +468,67 @@ static int handle_page_data(struct xc_sr_context *ctx, 
struct xc_sr_record *rec)
     return rc;
 }
 
+static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
+static int handle_checkpoint(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = 0;
+    unsigned i;
+
+    if ( !ctx->restore.checkpointed )
+    {
+        ERROR("Found checkpoint in non-checkpointed stream");
+        rc = -1;
+        goto err;
+    }
+
+    if ( ctx->restore.buffer_all_records )
+    {
+        IPRINTF("All records buffered");
+
+        for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
+        {
+            rc = process_record(ctx, &ctx->restore.buffered_records[i]);
+            if ( rc )
+                goto err;
+        }
+        ctx->restore.buffered_rec_num = 0;
+        IPRINTF("All records processed");
+    }
+    else
+        ctx->restore.buffer_all_records = true;
+
+ err:
+    return rc;
+}
+
+static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned new_alloc_num;
+    struct xc_sr_record *p;
+
+    if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
+    {
+        new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
+        p = realloc(ctx->restore.buffered_records,
+                    new_alloc_num * sizeof(struct xc_sr_record));
+        if ( !p )
+        {
+            ERROR("Failed to realloc memory for buffered records");
+            return -1;
+        }
+
+        ctx->restore.buffered_records = p;
+        ctx->restore.allocated_rec_num = new_alloc_num;
+    }
+
+    memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
+           rec, sizeof(*rec));
+
+    return 0;
+}
+
 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
 {
     xc_interface *xch = ctx->xch;
@@ -487,12 +548,17 @@ static int process_record(struct xc_sr_context *ctx, 
struct xc_sr_record *rec)
         ctx->restore.verify = true;
         break;
 
+    case REC_TYPE_CHECKPOINT:
+        rc = handle_checkpoint(ctx);
+        break;
+
     default:
         rc = ctx->restore.ops.process_record(ctx, rec);
         break;
     }
 
     free(rec->data);
+    rec->data = NULL;
 
     if ( rc == RECORD_NOT_PROCESSED )
     {
@@ -529,6 +595,15 @@ static int setup(struct xc_sr_context *ctx)
         goto err;
     }
 
+    ctx->restore.buffered_records = malloc(
+        DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
+    if ( !ctx->restore.buffered_records )
+    {
+        ERROR("Unable to allocate memory for buffered records");
+        rc = -1;
+        goto err;
+    }
+
  err:
     return rc;
 }
@@ -536,7 +611,12 @@ static int setup(struct xc_sr_context *ctx)
 static void cleanup(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
+    unsigned i;
+
+    for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
+        free(ctx->restore.buffered_records[i].data);
 
+    free(ctx->restore.buffered_records);
     free(ctx->restore.populated_pfns);
     if ( ctx->restore.ops.cleanup(ctx) )
         PERROR("Failed to clean up");
@@ -564,23 +644,50 @@ static int restore(struct xc_sr_context *ctx)
     {
         rc = read_record(ctx, &rec);
         if ( rc )
-            goto err;
-
-        rc = process_record(ctx, &rec);
-        if ( rc )
-            goto err;
-
-    } while ( rec.type != REC_TYPE_END );
+        {
+            if ( ctx->restore.buffer_all_records )
+                goto remus_failover;
+            else
+                goto err;
+        }
 
 #ifdef XG_LIBXL_HVM_COMPAT
-    if ( ctx->dominfo.hvm )
-    {
-        rc = read_qemu(ctx);
-        if ( rc )
-            goto err;
-    }
+        if ( ctx->dominfo.hvm &&
+             (rec.type == REC_TYPE_END || rec.type == REC_TYPE_CHECKPOINT) )
+        {
+            rc = read_qemu(ctx);
+            if ( rc )
+            {
+                if ( ctx->restore.buffer_all_records )
+                    goto remus_failover;
+                else
+                    goto err;
+            }
+        }
 #endif
 
+        if ( ctx->restore.buffer_all_records &&
+             rec.type != REC_TYPE_END &&
+             rec.type != REC_TYPE_CHECKPOINT )
+        {
+            rc = buffer_record(ctx, &rec);
+            if ( rc )
+                goto err;
+        }
+        else
+        {
+            rc = process_record(ctx, &rec);
+            if ( rc )
+                goto err;
+        }
+
+    } while ( rec.type != REC_TYPE_END );
+
+ remus_failover:
+    /*
+     * With Remus, if we reach here, there must be some error on primary,
+     * failover from the last checkpoint state.
+     */
     rc = ctx->restore.ops.stream_complete(ctx);
     if ( rc )
         goto err;
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.