[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 2/7] xc_domain_restore: introduce restore_callbacks for colo



In colo mode, SVM also runs. So we should update xc_restore to support it.
The first step is: add some callbacks for colo.

We add the following callbacks:
1. init(): init the private data used for colo
2. free(): free the resource we allocate and store in the private data
3. get_page(): SVM runs, so we can't update the memory in apply_batch().
   This callback will return a page buffer, and apply_batch() will copy
   the page to this buffer. The content of this buffer should be the current
   content of this page, so we can use it to do verify.
4. flush_memory(): update the SVM memory and pagetable.
5. update_p2m(): update the SVM p2m page.
6. finish_restore(): wait a new checkpoint.

We also add a new structure restore_data to avoid pass too many arguments
to these callbacks. This structure stores the variables used in
xc_domain_store(), and these variables will be used in the callback.

Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>

---
 tools/libxc/ia64/xc_ia64_linux_restore.c |   3 +-
 tools/libxc/xc_domain_restore.c          | 256 +++++++++++++++++++++----------
 tools/libxc/xenguest.h                   |  54 ++++++-
 tools/libxl/libxl_dom.c                  |   2 +-
 tools/xcutils/xc_restore.c               |   3 +-
 5 files changed, 230 insertions(+), 88 deletions(-)

diff --git a/tools/libxc/ia64/xc_ia64_linux_restore.c 
b/tools/libxc/ia64/xc_ia64_linux_restore.c
index b4e9e9c..ca76be6 100644
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c
@@ -550,7 +550,8 @@ int
 xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
                   unsigned int store_evtchn, unsigned long *store_mfn,
                   unsigned int console_evtchn, unsigned long *console_mfn,
-                  unsigned int hvm, unsigned int pae, int superpages)
+                  unsigned int hvm, unsigned int pae, int superpages,
+                  struct restore_callbacks *callbacks)
 {
     DECLARE_DOMCTL;
     int rc = 1;
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 43e6c52..fa828e9 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -882,13 +882,15 @@ static int pagebuf_get(xc_interface *xch, struct 
restore_ctx *ctx,
 static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx 
*ctx,
                        xen_pfn_t* region_mfn, unsigned long* pfn_type, int 
pae_extended_cr3,
                        unsigned int hvm, struct xc_mmu* mmu,
-                       pagebuf_t* pagebuf, int curbatch)
+                       pagebuf_t* pagebuf, int curbatch,
+                       struct restore_callbacks *callbacks)
 {
     int i, j, curpage, nr_mfns;
     /* used by debug verify code */
     unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
     /* Our mapping of the current region (batch) */
     char *region_base;
+    char *target_buf;
     /* A temporary mapping, and a copy, of one frame of guest memory. */
     unsigned long *page = NULL;
     int nraces = 0;
@@ -954,16 +956,19 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
         }
     }
 
-    /* Map relevant mfns */
-    pfn_err = calloc(j, sizeof(*pfn_err));
-    region_base = xc_map_foreign_bulk(
-        xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
-
-    if ( region_base == NULL )
+    if ( !callbacks || !callbacks->get_page)
     {
-        PERROR("map batch failed");
-        free(pfn_err);
-        return -1;
+        /* Map relevant mfns */
+        pfn_err = calloc(j, sizeof(*pfn_err));
+        region_base = xc_map_foreign_bulk(
+            xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
+
+        if ( region_base == NULL )
+        {
+            PERROR("map batch failed");
+            free(pfn_err);
+            return -1;
+        }
     }
 
     for ( i = 0, curpage = -1; i < j; i++ )
@@ -975,7 +980,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
             /* a bogus/unmapped page: skip it */
             continue;
 
-        if (pfn_err[i])
+        if ( (!callbacks || !callbacks->get_page) && pfn_err[i] )
         {
             ERROR("unexpected PFN mapping failure");
             goto err_mapped;
@@ -993,8 +998,20 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
 
         mfn = ctx->p2m[pfn];
 
+        if ( callbacks && callbacks->get_page )
+        {
+            target_buf = callbacks->get_page(&callbacks->comm_data,
+                                             callbacks->data, pfn);
+            if ( !target_buf )
+            {
+                ERROR("Cannot get a buffer to store memory");
+                goto err_mapped;
+            }
+        }
+        else
+            target_buf = region_base + i*PAGE_SIZE;
         /* In verify mode, we use a copy; otherwise we work in place */
-        page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
+        page = pagebuf->verify ? (void *)buf : target_buf;
 
         memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, 
PAGE_SIZE);
 
@@ -1038,27 +1055,26 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
 
         if ( pagebuf->verify )
         {
-            int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
+            int res = memcmp(buf, target_buf, PAGE_SIZE);
             if ( res )
             {
                 int v;
 
                 DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
                         "actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn],
-                        csum_page(region_base + (i + curbatch)*PAGE_SIZE),
+                        csum_page(target_buf),
                         csum_page(buf));
 
                 for ( v = 0; v < 4; v++ )
                 {
-                    unsigned long *p = (unsigned long *)
-                        (region_base + i*PAGE_SIZE);
+                    unsigned long *p = (unsigned long *)target_buf;
                     if ( buf[v] != p[v] )
                         DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
                 }
             }
         }
 
-        if ( !hvm &&
+        if ( (!callbacks || !callbacks->get_page) && !hvm &&
              xc_add_mmu_update(xch, mmu,
                                (((unsigned long long)mfn) << PAGE_SHIFT)
                                | MMU_MACHPHYS_UPDATE, pfn) )
@@ -1071,8 +1087,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
     rc = nraces;
 
   err_mapped:
-    munmap(region_base, j*PAGE_SIZE);
-    free(pfn_err);
+    if ( !callbacks || !callbacks->get_page )
+    {
+        munmap(region_base, j*PAGE_SIZE);
+        free(pfn_err);
+    }
 
     return rc;
 }
@@ -1080,7 +1099,8 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
                       unsigned int store_evtchn, unsigned long *store_mfn,
                       unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae, int superpages)
+                      unsigned int hvm, unsigned int pae, int superpages,
+                      struct restore_callbacks *callbacks)
 {
     DECLARE_DOMCTL;
     int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
@@ -1141,6 +1161,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     static struct restore_ctx *ctx = &_ctx;
     struct domain_info_context *dinfo = &ctx->dinfo;
 
+    struct restore_data *comm_data = NULL;
+    void *data = NULL;
+
     pagebuf_init(&pagebuf);
     memset(&tailbuf, 0, sizeof(tailbuf));
     tailbuf.ishvm = hvm;
@@ -1249,6 +1272,32 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         goto out;
     }
 
+    /* init callbacks->comm_data */
+    if ( callbacks )
+    {
+        callbacks->comm_data.xch = xch;
+        callbacks->comm_data.dom = dom;
+        callbacks->comm_data.dinfo = dinfo;
+        callbacks->comm_data.hvm = hvm;
+        callbacks->comm_data.pfn_type = pfn_type;
+        callbacks->comm_data.mmu = mmu;
+        callbacks->comm_data.p2m_frame_list = p2m_frame_list;
+        callbacks->comm_data.p2m = ctx->p2m;
+        comm_data = &callbacks->comm_data;
+
+        /* init callbacks->data */
+        if ( callbacks->init)
+        {
+            callbacks->data = NULL;
+            if (callbacks->init(&callbacks->comm_data, &callbacks->data) < 0 )
+            {
+                ERROR("Could not initialise restore callbacks private data");
+                goto out;
+            }
+        }
+        data = callbacks->data;
+    }
+
     xc_report_progress_start(xch, "Reloading memory pages", dinfo->p2m_size);
 
     /*
@@ -1298,7 +1347,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
             int brc;
 
             brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type,
-                              pae_extended_cr3, hvm, mmu, &pagebuf, curbatch);
+                              pae_extended_cr3, hvm, mmu, &pagebuf, curbatch,
+                              callbacks);
             if ( brc < 0 )
                 goto out;
 
@@ -1368,6 +1418,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         goto finish;
     }
 
+getpages:
     // DPRINTF("Buffered checkpoint\n");
 
     if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
@@ -1499,58 +1550,69 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         }
     }
 
-    /*
-     * Pin page tables. Do this after writing to them as otherwise Xen
-     * will barf when doing the type-checking.
-     */
-    nr_pins = 0;
-    for ( i = 0; i < dinfo->p2m_size; i++ )
+    if ( callbacks && callbacks->flush_memory )
     {
-        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
-            continue;
-
-        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        if ( callbacks->flush_memory(comm_data, data) < 0 )
         {
-        case XEN_DOMCTL_PFINFO_L1TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
-            break;
+            ERROR("Error doing callbacks->flush_memory()");
+            goto out;
+        }
+    }
+    else
+    {
+        /*
+         * Pin page tables. Do this after writing to them as otherwise Xen
+         * will barf when doing the type-checking.
+         */
+        nr_pins = 0;
+        for ( i = 0; i < dinfo->p2m_size; i++ )
+        {
+            if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+                continue;
 
-        case XEN_DOMCTL_PFINFO_L2TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
-            break;
+            switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+            {
+            case XEN_DOMCTL_PFINFO_L1TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+                break;
 
-        case XEN_DOMCTL_PFINFO_L3TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
-            break;
+            case XEN_DOMCTL_PFINFO_L2TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+                break;
 
-        case XEN_DOMCTL_PFINFO_L4TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
-            break;
+            case XEN_DOMCTL_PFINFO_L3TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+                break;
 
-        default:
-            continue;
-        }
+            case XEN_DOMCTL_PFINFO_L4TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+                break;
+
+            default:
+                continue;
+            }
 
-        pin[nr_pins].arg1.mfn = ctx->p2m[i];
-        nr_pins++;
+            pin[nr_pins].arg1.mfn = ctx->p2m[i];
+            nr_pins++;
 
-        /* Batch full? Then flush. */
-        if ( nr_pins == MAX_PIN_BATCH )
-        {
-            if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+            /* Batch full? Then flush. */
+            if ( nr_pins == MAX_PIN_BATCH )
             {
-                PERROR("Failed to pin batch of %d page tables", nr_pins);
-                goto out;
+                if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+                {
+                    PERROR("Failed to pin batch of %d page tables", nr_pins);
+                    goto out;
+                }
+                nr_pins = 0;
             }
-            nr_pins = 0;
         }
-    }
 
-    /* Flush final partial batch. */
-    if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
-    {
-        PERROR("Failed to pin batch of %d page tables", nr_pins);
-        goto out;
+        /* Flush final partial batch. */
+        if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
+        {
+            PERROR("Failed to pin batch of %d page tables", nr_pins);
+            goto out;
+        }
     }
 
     DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
@@ -1767,37 +1829,61 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     /* leave wallclock time. set by hypervisor */
     munmap(new_shared_info, PAGE_SIZE);
 
-    /* Uncanonicalise the pfn-to-mfn table frame-number list. */
-    for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+    if ( callbacks && callbacks->update_p2m )
     {
-        pfn = p2m_frame_list[i];
-        if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != 
XEN_DOMCTL_PFINFO_NOTAB) )
+        if ( callbacks->update_p2m(comm_data, data) < 0 )
         {
-            ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+            ERROR("Error doing callbacks->update_p2m()");
             goto out;
         }
-        p2m_frame_list[i] = ctx->p2m[pfn];
     }
-
-    /* Copy the P2M we've constructed to the 'live' P2M */
-    if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
-                                           p2m_frame_list, P2M_FL_ENTRIES)) )
+    else
     {
-        PERROR("Couldn't map p2m table");
-        goto out;
+        /* Uncanonicalise the pfn-to-mfn table frame-number list. */
+        for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+        {
+            pfn = p2m_frame_list[i];
+            if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != 
XEN_DOMCTL_PFINFO_NOTAB) )
+            {
+                ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+                goto out;
+            }
+            p2m_frame_list[i] = ctx->p2m[pfn];
+        }
+
+        /* Copy the P2M we've constructed to the 'live' P2M */
+        if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
+                                               p2m_frame_list, 
P2M_FL_ENTRIES)) )
+        {
+            PERROR("Couldn't map p2m table");
+            goto out;
+        }
+
+        /* If the domain we're restoring has a different word size to ours,
+         * we need to adjust the live_p2m assignment appropriately */
+        if ( dinfo->guest_width > sizeof (xen_pfn_t) )
+            for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
+                ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
+        else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
+            for ( i = 0; i < dinfo->p2m_size; i++ )
+                ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
+        else
+            memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * 
sizeof(xen_pfn_t));
+        munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
     }
 
-    /* If the domain we're restoring has a different word size to ours,
-     * we need to adjust the live_p2m assignment appropriately */
-    if ( dinfo->guest_width > sizeof (xen_pfn_t) )
-        for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
-            ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
-    else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
-        for ( i = 0; i < dinfo->p2m_size; i++ )   
-            ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
-    else
-        memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
-    munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
+    if ( callbacks && callbacks->finish_restotre )
+    {
+        rc = callbacks->finish_restotre(comm_data, data);
+        if ( rc == 1 )
+            goto getpages;
+
+        if ( rc < 0 )
+        {
+            ERROR("Er1ror doing callbacks->finish_restotre()");
+            goto out;
+        }
+    }
 
     DPRINTF("Domain ready to be built.\n");
     rc = 0;
@@ -1861,6 +1947,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     rc = 0;
 
  out:
+    if ( callbacks && callbacks->free && callbacks->data)
+        callbacks->free(&callbacks->comm_data, callbacks->data);
     if ( (rc != 0) && (dom != 0) )
         xc_domain_destroy(xch, dom);
     xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 9ed0ea4..709a284 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -60,6 +60,57 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom, uint32_t max_iter
                    struct save_callbacks* callbacks, int hvm);
 
 
+/* pass the variable defined in xc_domain_restore() to callback. Use
+ * this structure for the following purpose:
+ *   1. avoid too many arguments.
+ *   2. different callback implemention may need different arguments.
+ *      Just add the information you need here.
+ */
+struct restore_data
+{
+    xc_interface *xch;
+    uint32_t dom;
+    struct domain_info_context *dinfo;
+    int hvm;
+    unsigned long *pfn_type;
+    struct xc_mmu *mmu;
+    unsigned long *p2m_frame_list;
+    unsigned long *p2m;
+};
+
+/* callbacks provided by xc_domain_restore */
+struct restore_callbacks {
+    /* callback to init data */
+    int (*init)(struct restore_data *comm_data, void **data);
+    /* callback to free data */
+    void (*free)(struct restore_data *comm_data, void *data);
+    /* callback to get a buffer to store memory data that is transfered
+     * from the source machine.
+     */
+    char *(*get_page)(struct restore_data *comm_data, void *data,
+                     unsigned long pfn);
+    /* callback to flush memory that is transfered from the source machine
+     * to the guest. Update the guest's pagetable if necessary.
+     */
+    int (*flush_memory)(struct restore_data *comm_data, void *data);
+    /* callback to update the guest's p2m table */
+    int (*update_p2m)(struct restore_data *comm_data, void *data);
+    /* callback to finish restore process. It is called before 
xc_domain_restore()
+     * returns.
+     *
+     * Return value:
+     *   -1: error
+     *    0: continue to start vm
+     *    1: continue to do a checkpoint
+     */
+    int (*finish_restotre)(struct restore_data *comm_data, void *data);
+
+    /* xc_domain_restore() init it */
+    struct restore_data comm_data;
+    /* to be provided as the last argument to each callback function */
+    void* data;
+};
+
 /**
  * This function will restore a saved domain.
  *
@@ -76,7 +127,8 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom, uint32_t max_iter
 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
                       unsigned int store_evtchn, unsigned long *store_mfn,
                       unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae, int superpages);
+                      unsigned int hvm, unsigned int pae, int superpages,
+                      struct restore_callbacks *callbacks);
 /**
  * xc_domain_restore writes a file to disk that contains the device
  * model saved state.
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index c702cf7..32cdd03 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -305,7 +305,7 @@ int libxl__domain_restore_common(libxl_ctx *ctx, uint32_t 
domid,
     rc = xc_domain_restore(ctx->xch, fd, domid,
                              state->store_port, &state->store_mfn,
                              state->console_port, &state->console_mfn,
-                             info->hvm, info->u.hvm.pae, 0);
+                             info->hvm, info->u.hvm.pae, 0, NULL);
     if ( rc ) {
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "restoring domain");
         return ERROR_FAIL;
diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c
index ea069ac..8af88e4 100644
--- a/tools/xcutils/xc_restore.c
+++ b/tools/xcutils/xc_restore.c
@@ -46,7 +46,8 @@ main(int argc, char **argv)
            superpages = 0;
 
     ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn,
-                            console_evtchn, &console_mfn, hvm, pae, 
superpages);
+                            console_evtchn, &console_mfn, hvm, pae, superpages,
+                            NULL);
 
     if ( ret == 0 )
     {
-- 
1.8.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.