[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC Patch v2 05/16] xc_domain_restore: introduce restore_callbacks for colo



In colo mode, SVM also runs. So we should update xc_restore to support it.
The first step is: add some callbacks for colo.

We add the following callbacks:
1. init(): init the private data used for colo
2. free(): free the resource we allocate and store in the private data
3. get_page(): SVM runs, so we can't update the memory in apply_batch().
   This callback will return a page buffer, and apply_batch() will copy
   the page to this buffer. The content of this buffer should be the current
   content of this page, so we can use it to do verify.
4. flush_memory(): update the SVM memory and pagetable.
5. update_p2m(): update the SVM p2m page.
6. finish_restore(): wait a new checkpoint.

We also add a new structure restore_data to avoid pass too many arguments
to these callbacks. This structure stores the variables used in
xc_domain_store(), and these variables will be used in the callback.

Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
 tools/libxc/xc_domain_restore.c |  264 ++++++++++++++++++++++++++-------------
 tools/libxc/xenguest.h          |   48 +++++++
 2 files changed, 225 insertions(+), 87 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 63d36cd..aac2de0 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1076,7 +1076,8 @@ static int pagebuf_get(xc_interface *xch, struct 
restore_ctx *ctx,
 static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx 
*ctx,
                        xen_pfn_t* region_mfn, unsigned long* pfn_type, int 
pae_extended_cr3,
                        struct xc_mmu* mmu,
-                       pagebuf_t* pagebuf, int curbatch)
+                       pagebuf_t* pagebuf, int curbatch,
+                       struct restore_callbacks *callbacks)
 {
     int i, j, curpage, nr_mfns;
     int k, scount;
@@ -1085,6 +1086,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
     unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
     /* Our mapping of the current region (batch) */
     char *region_base;
+    char *target_buf;
     /* A temporary mapping, and a copy, of one frame of guest memory. */
     unsigned long *page = NULL;
     int nraces = 0;
@@ -1241,21 +1243,24 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
             region_mfn[i] = ctx->hvm ? pfn : ctx->p2m[pfn];
     }
 
-    /* Map relevant mfns */
-    pfn_err = calloc(j, sizeof(*pfn_err));
-    if ( pfn_err == NULL )
+    if ( !callbacks || !callbacks->get_page)
     {
-        PERROR("allocation for pfn_err failed");
-        return -1;
-    }
-    region_base = xc_map_foreign_bulk(
-        xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
+        /* Map relevant mfns */
+        pfn_err = calloc(j, sizeof(*pfn_err));
+        if ( pfn_err == NULL )
+        {
+            PERROR("allocation for pfn_err failed");
+            return -1;
+        }
+        region_base = xc_map_foreign_bulk(
+            xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
 
-    if ( region_base == NULL )
-    {
-        PERROR("map batch failed");
-        free(pfn_err);
-        return -1;
+        if ( region_base == NULL )
+        {
+            PERROR("map batch failed");
+            free(pfn_err);
+            return -1;
+        }
     }
 
     for ( i = 0, curpage = -1; i < j; i++ )
@@ -1279,7 +1284,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
             continue;
         }
 
-        if (pfn_err[i])
+        if ( (!callbacks || !callbacks->get_page) && pfn_err[i] )
         {
             ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn 
%lx",
                   pfn, region_mfn[i], ctx->p2m[pfn]);
@@ -1298,8 +1303,20 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
 
         mfn = ctx->p2m[pfn];
 
+        if ( callbacks && callbacks->get_page )
+        {
+            target_buf = callbacks->get_page(&callbacks->comm_data,
+                                             callbacks->data, pfn);
+            if ( !target_buf )
+            {
+                ERROR("Cannot get a buffer to store memory");
+                goto err_mapped;
+            }
+        }
+        else
+            target_buf = region_base + i*PAGE_SIZE;
         /* In verify mode, we use a copy; otherwise we work in place */
-        page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
+        page = pagebuf->verify ? (void *)buf : target_buf;
 
         /* Remus - page decompression */
         if (pagebuf->compressing)
@@ -1357,27 +1374,26 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
 
         if ( pagebuf->verify )
         {
-            int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
+            int res = memcmp(buf, target_buf, PAGE_SIZE);
             if ( res )
             {
                 int v;
 
                 DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
                         "actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn],
-                        csum_page(region_base + (i + curbatch)*PAGE_SIZE),
+                        csum_page(target_buf),
                         csum_page(buf));
 
                 for ( v = 0; v < 4; v++ )
                 {
-                    unsigned long *p = (unsigned long *)
-                        (region_base + i*PAGE_SIZE);
+                    unsigned long *p = (unsigned long *)target_buf;
                     if ( buf[v] != p[v] )
                         DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
                 }
             }
         }
 
-        if ( !ctx->hvm &&
+        if ( (!callbacks || !callbacks->get_page) && !ctx->hvm &&
              xc_add_mmu_update(xch, mmu,
                                (((unsigned long long)mfn) << PAGE_SHIFT)
                                | MMU_MACHPHYS_UPDATE, pfn) )
@@ -1390,8 +1406,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom, 
struct restore_ctx *ctx,
     rc = nraces;
 
   err_mapped:
-    munmap(region_base, j*PAGE_SIZE);
-    free(pfn_err);
+    if ( !callbacks || !callbacks->get_page )
+    {
+        munmap(region_base, j*PAGE_SIZE);
+        free(pfn_err);
+    }
 
     return rc;
 }
@@ -1461,6 +1480,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     struct restore_ctx *ctx = &_ctx;
     struct domain_info_context *dinfo = &ctx->dinfo;
 
+    struct restore_data *comm_data = NULL;
+    void *data = NULL;
+
     DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
     pagebuf_init(&pagebuf);
@@ -1582,6 +1604,33 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         goto out;
     }
 
+    /* init callbacks->comm_data */
+    if ( callbacks )
+    {
+        callbacks->comm_data.xch = xch;
+        callbacks->comm_data.dom = dom;
+        callbacks->comm_data.dinfo = dinfo;
+        callbacks->comm_data.io_fd = io_fd;
+        callbacks->comm_data.hvm = hvm;
+        callbacks->comm_data.pfn_type = pfn_type;
+        callbacks->comm_data.mmu = mmu;
+        callbacks->comm_data.p2m_frame_list = p2m_frame_list;
+        callbacks->comm_data.p2m = ctx->p2m;
+        comm_data = &callbacks->comm_data;
+
+        /* init callbacks->data */
+        if ( callbacks->init)
+        {
+            callbacks->data = NULL;
+            if (callbacks->init(&callbacks->comm_data, &callbacks->data) < 0 )
+            {
+                ERROR("Could not initialise restore callbacks private data");
+                goto out;
+            }
+        }
+        data = callbacks->data;
+    }
+
     xc_report_progress_start(xch, "Reloading memory pages", dinfo->p2m_size);
 
     /*
@@ -1676,7 +1725,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
             int brc;
 
             brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type,
-                              pae_extended_cr3, mmu, &pagebuf, curbatch);
+                              pae_extended_cr3, mmu, &pagebuf, curbatch,
+                              callbacks);
             if ( brc < 0 )
                 goto out;
 
@@ -1761,6 +1811,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         goto finish;
     }
 
+getpages:
     // DPRINTF("Buffered checkpoint\n");
 
     if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
@@ -1902,58 +1953,69 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
         }
     }
 
-    /*
-     * Pin page tables. Do this after writing to them as otherwise Xen
-     * will barf when doing the type-checking.
-     */
-    nr_pins = 0;
-    for ( i = 0; i < dinfo->p2m_size; i++ )
+    if ( callbacks && callbacks->flush_memory )
     {
-        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
-            continue;
-
-        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        if ( callbacks->flush_memory(comm_data, data) < 0 )
         {
-        case XEN_DOMCTL_PFINFO_L1TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
-            break;
+            ERROR("Error doing callbacks->flush_memory()");
+            goto out;
+        }
+    }
+    else
+    {
+        /*
+         * Pin page tables. Do this after writing to them as otherwise Xen
+         * will barf when doing the type-checking.
+         */
+        nr_pins = 0;
+        for ( i = 0; i < dinfo->p2m_size; i++ )
+        {
+            if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+                continue;
 
-        case XEN_DOMCTL_PFINFO_L2TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
-            break;
+            switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+            {
+            case XEN_DOMCTL_PFINFO_L1TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+                break;
 
-        case XEN_DOMCTL_PFINFO_L3TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
-            break;
+            case XEN_DOMCTL_PFINFO_L2TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+                break;
 
-        case XEN_DOMCTL_PFINFO_L4TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
-            break;
+            case XEN_DOMCTL_PFINFO_L3TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+                break;
 
-        default:
-            continue;
-        }
+            case XEN_DOMCTL_PFINFO_L4TAB:
+                pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+                break;
 
-        pin[nr_pins].arg1.mfn = ctx->p2m[i];
-        nr_pins++;
+            default:
+                continue;
+            }
 
-        /* Batch full? Then flush. */
-        if ( nr_pins == MAX_PIN_BATCH )
-        {
-            if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+            pin[nr_pins].arg1.mfn = ctx->p2m[i];
+            nr_pins++;
+
+            /* Batch full? Then flush. */
+            if ( nr_pins == MAX_PIN_BATCH )
             {
-                PERROR("Failed to pin batch of %d page tables", nr_pins);
-                goto out;
+                if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+                {
+                    PERROR("Failed to pin batch of %d page tables", nr_pins);
+                    goto out;
+                }
+                nr_pins = 0;
             }
-            nr_pins = 0;
         }
-    }
 
-    /* Flush final partial batch. */
-    if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
-    {
-        PERROR("Failed to pin batch of %d page tables", nr_pins);
-        goto out;
+        /* Flush final partial batch. */
+        if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
+        {
+            PERROR("Failed to pin batch of %d page tables", nr_pins);
+            goto out;
+        }
     }
 
     DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
@@ -2052,6 +2114,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
             *console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn)];
             SET_FIELD(start_info, console.domU.mfn, *console_mfn);
             SET_FIELD(start_info, console.domU.evtchn, console_evtchn);
+            callbacks->comm_data.store_mfn = *store_mfn;
+            callbacks->comm_data.console_mfn = *console_mfn;
             munmap(start_info, PAGE_SIZE);
         }
         /* Uncanonicalise each GDT frame number. */
@@ -2199,37 +2263,61 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     /* leave wallclock time. set by hypervisor */
     munmap(new_shared_info, PAGE_SIZE);
 
-    /* Uncanonicalise the pfn-to-mfn table frame-number list. */
-    for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+    if ( callbacks && callbacks->update_p2m )
     {
-        pfn = p2m_frame_list[i];
-        if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != 
XEN_DOMCTL_PFINFO_NOTAB) )
+        if ( callbacks->update_p2m(comm_data, data) < 0 )
         {
-            ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+            ERROR("Error doing callbacks->update_p2m()");
             goto out;
         }
-        p2m_frame_list[i] = ctx->p2m[pfn];
     }
-
-    /* Copy the P2M we've constructed to the 'live' P2M */
-    if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
-                                           p2m_frame_list, P2M_FL_ENTRIES)) )
+    else
     {
-        PERROR("Couldn't map p2m table");
-        goto out;
+        /* Uncanonicalise the pfn-to-mfn table frame-number list. */
+        for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+        {
+            pfn = p2m_frame_list[i];
+            if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != 
XEN_DOMCTL_PFINFO_NOTAB) )
+            {
+                ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+                goto out;
+            }
+            p2m_frame_list[i] = ctx->p2m[pfn];
+        }
+
+        /* Copy the P2M we've constructed to the 'live' P2M */
+        if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
+                                               p2m_frame_list, 
P2M_FL_ENTRIES)) )
+        {
+            PERROR("Couldn't map p2m table");
+            goto out;
+        }
+
+        /* If the domain we're restoring has a different word size to ours,
+         * we need to adjust the live_p2m assignment appropriately */
+        if ( dinfo->guest_width > sizeof (xen_pfn_t) )
+            for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
+                ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
+        else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
+            for ( i = 0; i < dinfo->p2m_size; i++ )
+                ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
+        else
+            memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * 
sizeof(xen_pfn_t));
+        munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
     }
 
-    /* If the domain we're restoring has a different word size to ours,
-     * we need to adjust the live_p2m assignment appropriately */
-    if ( dinfo->guest_width > sizeof (xen_pfn_t) )
-        for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
-            ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
-    else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
-        for ( i = 0; i < dinfo->p2m_size; i++ )   
-            ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
-    else
-        memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
-    munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
+    if ( callbacks && callbacks->finish_restotre )
+    {
+        rc = callbacks->finish_restotre(comm_data, data);
+        if ( rc == 1 )
+            goto getpages;
+
+        if ( rc < 0 )
+        {
+            ERROR("Er1ror doing callbacks->finish_restotre()");
+            goto out;
+        }
+    }
 
     rc = xc_dom_gnttab_seed(xch, dom, *console_mfn, *store_mfn,
                             console_domid, store_domid);
@@ -2329,6 +2417,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, 
uint32_t dom,
     rc = 0;
 
  out:
+    if ( callbacks && callbacks->free && callbacks->data)
+        callbacks->free(&callbacks->comm_data, callbacks->data);
     if ( (rc != 0) && (dom != 0) )
         xc_domain_destroy(xch, dom);
     xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 4714bd2..4bb444a 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -90,12 +90,60 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom, uint32_t max_iter
                    unsigned long vm_generationid_addr);
 
 
+/* pass the variable defined in xc_domain_restore() to callback. Use
+ * this structure for the following purpose:
+ *   1. avoid too many arguments.
+ *   2. different callback implemention may need different arguments.
+ *      Just add the information you need here.
+ */
+struct restore_data
+{
+    xc_interface *xch;
+    uint32_t dom;
+    struct domain_info_context *dinfo;
+    int io_fd;
+    int hvm;
+    unsigned long *pfn_type;
+    struct xc_mmu *mmu;
+    unsigned long *p2m_frame_list;
+    unsigned long *p2m;
+    unsigned long console_mfn;
+    unsigned long store_mfn;
+};
+
 /* callbacks provided by xc_domain_restore */
 struct restore_callbacks {
+    /* callback to init data */
+    int (*init)(struct restore_data *comm_data, void **data);
+    /* callback to free data */
+    void (*free)(struct restore_data *comm_data, void *data);
+    /* callback to get a buffer to store memory data that is transfered
+     * from the source machine.
+     */
+    char *(*get_page)(struct restore_data *comm_data, void *data,
+                     unsigned long pfn);
+    /* callback to flush memory that is transfered from the source machine
+     * to the guest. Update the guest's pagetable if necessary.
+     */
+    int (*flush_memory)(struct restore_data *comm_data, void *data);
+    /* callback to update the guest's p2m table */
+    int (*update_p2m)(struct restore_data *comm_data, void *data);
+    /* callback to finish restore process. It is called before 
xc_domain_restore()
+     * returns.
+     *
+     * Return value:
+     *   -1: error
+     *    0: continue to start vm
+     *    1: continue to do a checkpoint
+     */
+    int (*finish_restotre)(struct restore_data *comm_data, void *data);
     /* callback to restore toolstack specific data */
     int (*toolstack_restore)(uint32_t domid, const uint8_t *buf,
             uint32_t size, void* data);
 
+    /* xc_domain_restore() init it */
+    struct restore_data comm_data;
+
     /* to be provided as the last argument to each callback function */
     void* data;
 };
-- 
1.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.