|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 2/7] xc_domain_restore: introduce restore_callbacks for colo
In colo mode, SVM also runs. So we should update xc_restore to support it.
The first step is: add some callbacks for colo.
We add the following callbacks:
1. init(): init the private data used for colo
2. free(): free the resource we allocate and store in the private data
3. get_page(): SVM runs, so we can't update the memory in apply_batch().
This callback will return a page buffer, and apply_batch() will copy
the page to this buffer. The content of this buffer should be the current
content of this page, so we can use it to do verify.
4. flush_memory(): update the SVM memory and pagetable.
5. update_p2m(): update the SVM p2m page.
6. finish_restore(): wait a new checkpoint.
We also add a new structure restore_data to avoid pass too many arguments
to these callbacks. This structure stores the variables used in
xc_domain_store(), and these variables will be used in the callback.
Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx>
Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx>
---
tools/libxc/ia64/xc_ia64_linux_restore.c | 3 +-
tools/libxc/xc_domain_restore.c | 256 +++++++++++++++++++++----------
tools/libxc/xenguest.h | 54 ++++++-
tools/libxl/libxl_dom.c | 2 +-
tools/xcutils/xc_restore.c | 3 +-
5 files changed, 230 insertions(+), 88 deletions(-)
diff --git a/tools/libxc/ia64/xc_ia64_linux_restore.c
b/tools/libxc/ia64/xc_ia64_linux_restore.c
index b4e9e9c..ca76be6 100644
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c
@@ -550,7 +550,8 @@ int
xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
unsigned int store_evtchn, unsigned long *store_mfn,
unsigned int console_evtchn, unsigned long *console_mfn,
- unsigned int hvm, unsigned int pae, int superpages)
+ unsigned int hvm, unsigned int pae, int superpages,
+ struct restore_callbacks *callbacks)
{
DECLARE_DOMCTL;
int rc = 1;
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 43e6c52..fa828e9 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -882,13 +882,15 @@ static int pagebuf_get(xc_interface *xch, struct
restore_ctx *ctx,
static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx
*ctx,
xen_pfn_t* region_mfn, unsigned long* pfn_type, int
pae_extended_cr3,
unsigned int hvm, struct xc_mmu* mmu,
- pagebuf_t* pagebuf, int curbatch)
+ pagebuf_t* pagebuf, int curbatch,
+ struct restore_callbacks *callbacks)
{
int i, j, curpage, nr_mfns;
/* used by debug verify code */
unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
/* Our mapping of the current region (batch) */
char *region_base;
+ char *target_buf;
/* A temporary mapping, and a copy, of one frame of guest memory. */
unsigned long *page = NULL;
int nraces = 0;
@@ -954,16 +956,19 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
}
}
- /* Map relevant mfns */
- pfn_err = calloc(j, sizeof(*pfn_err));
- region_base = xc_map_foreign_bulk(
- xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
-
- if ( region_base == NULL )
+ if ( !callbacks || !callbacks->get_page)
{
- PERROR("map batch failed");
- free(pfn_err);
- return -1;
+ /* Map relevant mfns */
+ pfn_err = calloc(j, sizeof(*pfn_err));
+ region_base = xc_map_foreign_bulk(
+ xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
+
+ if ( region_base == NULL )
+ {
+ PERROR("map batch failed");
+ free(pfn_err);
+ return -1;
+ }
}
for ( i = 0, curpage = -1; i < j; i++ )
@@ -975,7 +980,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
/* a bogus/unmapped page: skip it */
continue;
- if (pfn_err[i])
+ if ( (!callbacks || !callbacks->get_page) && pfn_err[i] )
{
ERROR("unexpected PFN mapping failure");
goto err_mapped;
@@ -993,8 +998,20 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
mfn = ctx->p2m[pfn];
+ if ( callbacks && callbacks->get_page )
+ {
+ target_buf = callbacks->get_page(&callbacks->comm_data,
+ callbacks->data, pfn);
+ if ( !target_buf )
+ {
+ ERROR("Cannot get a buffer to store memory");
+ goto err_mapped;
+ }
+ }
+ else
+ target_buf = region_base + i*PAGE_SIZE;
/* In verify mode, we use a copy; otherwise we work in place */
- page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
+ page = pagebuf->verify ? (void *)buf : target_buf;
memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE,
PAGE_SIZE);
@@ -1038,27 +1055,26 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
if ( pagebuf->verify )
{
- int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
+ int res = memcmp(buf, target_buf, PAGE_SIZE);
if ( res )
{
int v;
DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
"actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn],
- csum_page(region_base + (i + curbatch)*PAGE_SIZE),
+ csum_page(target_buf),
csum_page(buf));
for ( v = 0; v < 4; v++ )
{
- unsigned long *p = (unsigned long *)
- (region_base + i*PAGE_SIZE);
+ unsigned long *p = (unsigned long *)target_buf;
if ( buf[v] != p[v] )
DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
}
}
}
- if ( !hvm &&
+ if ( (!callbacks || !callbacks->get_page) && !hvm &&
xc_add_mmu_update(xch, mmu,
(((unsigned long long)mfn) << PAGE_SHIFT)
| MMU_MACHPHYS_UPDATE, pfn) )
@@ -1071,8 +1087,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
rc = nraces;
err_mapped:
- munmap(region_base, j*PAGE_SIZE);
- free(pfn_err);
+ if ( !callbacks || !callbacks->get_page )
+ {
+ munmap(region_base, j*PAGE_SIZE);
+ free(pfn_err);
+ }
return rc;
}
@@ -1080,7 +1099,8 @@ static int apply_batch(xc_interface *xch, uint32_t dom,
struct restore_ctx *ctx,
int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
unsigned int store_evtchn, unsigned long *store_mfn,
unsigned int console_evtchn, unsigned long *console_mfn,
- unsigned int hvm, unsigned int pae, int superpages)
+ unsigned int hvm, unsigned int pae, int superpages,
+ struct restore_callbacks *callbacks)
{
DECLARE_DOMCTL;
int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
@@ -1141,6 +1161,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
static struct restore_ctx *ctx = &_ctx;
struct domain_info_context *dinfo = &ctx->dinfo;
+ struct restore_data *comm_data = NULL;
+ void *data = NULL;
+
pagebuf_init(&pagebuf);
memset(&tailbuf, 0, sizeof(tailbuf));
tailbuf.ishvm = hvm;
@@ -1249,6 +1272,32 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
goto out;
}
+ /* init callbacks->comm_data */
+ if ( callbacks )
+ {
+ callbacks->comm_data.xch = xch;
+ callbacks->comm_data.dom = dom;
+ callbacks->comm_data.dinfo = dinfo;
+ callbacks->comm_data.hvm = hvm;
+ callbacks->comm_data.pfn_type = pfn_type;
+ callbacks->comm_data.mmu = mmu;
+ callbacks->comm_data.p2m_frame_list = p2m_frame_list;
+ callbacks->comm_data.p2m = ctx->p2m;
+ comm_data = &callbacks->comm_data;
+
+ /* init callbacks->data */
+ if ( callbacks->init)
+ {
+ callbacks->data = NULL;
+ if (callbacks->init(&callbacks->comm_data, &callbacks->data) < 0 )
+ {
+ ERROR("Could not initialise restore callbacks private data");
+ goto out;
+ }
+ }
+ data = callbacks->data;
+ }
+
xc_report_progress_start(xch, "Reloading memory pages", dinfo->p2m_size);
/*
@@ -1298,7 +1347,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
int brc;
brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type,
- pae_extended_cr3, hvm, mmu, &pagebuf, curbatch);
+ pae_extended_cr3, hvm, mmu, &pagebuf, curbatch,
+ callbacks);
if ( brc < 0 )
goto out;
@@ -1368,6 +1418,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
goto finish;
}
+getpages:
// DPRINTF("Buffered checkpoint\n");
if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
@@ -1499,58 +1550,69 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
}
}
- /*
- * Pin page tables. Do this after writing to them as otherwise Xen
- * will barf when doing the type-checking.
- */
- nr_pins = 0;
- for ( i = 0; i < dinfo->p2m_size; i++ )
+ if ( callbacks && callbacks->flush_memory )
{
- if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
- continue;
-
- switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ if ( callbacks->flush_memory(comm_data, data) < 0 )
{
- case XEN_DOMCTL_PFINFO_L1TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
- break;
+ ERROR("Error doing callbacks->flush_memory()");
+ goto out;
+ }
+ }
+ else
+ {
+ /*
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+ nr_pins = 0;
+ for ( i = 0; i < dinfo->p2m_size; i++ )
+ {
+ if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
- case XEN_DOMCTL_PFINFO_L2TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
- break;
+ switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
- case XEN_DOMCTL_PFINFO_L3TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
- break;
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+ break;
- case XEN_DOMCTL_PFINFO_L4TAB:
- pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
- break;
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+ break;
- default:
- continue;
- }
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+ break;
+
+ default:
+ continue;
+ }
- pin[nr_pins].arg1.mfn = ctx->p2m[i];
- nr_pins++;
+ pin[nr_pins].arg1.mfn = ctx->p2m[i];
+ nr_pins++;
- /* Batch full? Then flush. */
- if ( nr_pins == MAX_PIN_BATCH )
- {
- if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+ /* Batch full? Then flush. */
+ if ( nr_pins == MAX_PIN_BATCH )
{
- PERROR("Failed to pin batch of %d page tables", nr_pins);
- goto out;
+ if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ goto out;
+ }
+ nr_pins = 0;
}
- nr_pins = 0;
}
- }
- /* Flush final partial batch. */
- if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
- {
- PERROR("Failed to pin batch of %d page tables", nr_pins);
- goto out;
+ /* Flush final partial batch. */
+ if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ goto out;
+ }
}
DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
@@ -1767,37 +1829,61 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
/* leave wallclock time. set by hypervisor */
munmap(new_shared_info, PAGE_SIZE);
- /* Uncanonicalise the pfn-to-mfn table frame-number list. */
- for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+ if ( callbacks && callbacks->update_p2m )
{
- pfn = p2m_frame_list[i];
- if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] !=
XEN_DOMCTL_PFINFO_NOTAB) )
+ if ( callbacks->update_p2m(comm_data, data) < 0 )
{
- ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+ ERROR("Error doing callbacks->update_p2m()");
goto out;
}
- p2m_frame_list[i] = ctx->p2m[pfn];
}
-
- /* Copy the P2M we've constructed to the 'live' P2M */
- if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
- p2m_frame_list, P2M_FL_ENTRIES)) )
+ else
{
- PERROR("Couldn't map p2m table");
- goto out;
+ /* Uncanonicalise the pfn-to-mfn table frame-number list. */
+ for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+ {
+ pfn = p2m_frame_list[i];
+ if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] !=
XEN_DOMCTL_PFINFO_NOTAB) )
+ {
+ ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
+ goto out;
+ }
+ p2m_frame_list[i] = ctx->p2m[pfn];
+ }
+
+ /* Copy the P2M we've constructed to the 'live' P2M */
+ if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
+ p2m_frame_list,
P2M_FL_ENTRIES)) )
+ {
+ PERROR("Couldn't map p2m table");
+ goto out;
+ }
+
+ /* If the domain we're restoring has a different word size to ours,
+ * we need to adjust the live_p2m assignment appropriately */
+ if ( dinfo->guest_width > sizeof (xen_pfn_t) )
+ for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
+ ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
+ else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
+ for ( i = 0; i < dinfo->p2m_size; i++ )
+ ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
+ else
+ memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size *
sizeof(xen_pfn_t));
+ munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
}
- /* If the domain we're restoring has a different word size to ours,
- * we need to adjust the live_p2m assignment appropriately */
- if ( dinfo->guest_width > sizeof (xen_pfn_t) )
- for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
- ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
- else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
- for ( i = 0; i < dinfo->p2m_size; i++ )
- ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
- else
- memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
- munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
+ if ( callbacks && callbacks->finish_restotre )
+ {
+ rc = callbacks->finish_restotre(comm_data, data);
+ if ( rc == 1 )
+ goto getpages;
+
+ if ( rc < 0 )
+ {
+ ERROR("Er1ror doing callbacks->finish_restotre()");
+ goto out;
+ }
+ }
DPRINTF("Domain ready to be built.\n");
rc = 0;
@@ -1861,6 +1947,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd,
uint32_t dom,
rc = 0;
out:
+ if ( callbacks && callbacks->free && callbacks->data)
+ callbacks->free(&callbacks->comm_data, callbacks->data);
if ( (rc != 0) && (dom != 0) )
xc_domain_destroy(xch, dom);
xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 9ed0ea4..709a284 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -60,6 +60,57 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t
dom, uint32_t max_iter
struct save_callbacks* callbacks, int hvm);
+/* pass the variable defined in xc_domain_restore() to callback. Use
+ * this structure for the following purpose:
+ * 1. avoid too many arguments.
+ * 2. different callback implemention may need different arguments.
+ * Just add the information you need here.
+ */
+struct restore_data
+{
+ xc_interface *xch;
+ uint32_t dom;
+ struct domain_info_context *dinfo;
+ int hvm;
+ unsigned long *pfn_type;
+ struct xc_mmu *mmu;
+ unsigned long *p2m_frame_list;
+ unsigned long *p2m;
+};
+
+/* callbacks provided by xc_domain_restore */
+struct restore_callbacks {
+ /* callback to init data */
+ int (*init)(struct restore_data *comm_data, void **data);
+ /* callback to free data */
+ void (*free)(struct restore_data *comm_data, void *data);
+ /* callback to get a buffer to store memory data that is transfered
+ * from the source machine.
+ */
+ char *(*get_page)(struct restore_data *comm_data, void *data,
+ unsigned long pfn);
+ /* callback to flush memory that is transfered from the source machine
+ * to the guest. Update the guest's pagetable if necessary.
+ */
+ int (*flush_memory)(struct restore_data *comm_data, void *data);
+ /* callback to update the guest's p2m table */
+ int (*update_p2m)(struct restore_data *comm_data, void *data);
+ /* callback to finish restore process. It is called before
xc_domain_restore()
+ * returns.
+ *
+ * Return value:
+ * -1: error
+ * 0: continue to start vm
+ * 1: continue to do a checkpoint
+ */
+ int (*finish_restotre)(struct restore_data *comm_data, void *data);
+
+ /* xc_domain_restore() init it */
+ struct restore_data comm_data;
+ /* to be provided as the last argument to each callback function */
+ void* data;
+};
+
/**
* This function will restore a saved domain.
*
@@ -76,7 +127,8 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t
dom, uint32_t max_iter
int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
unsigned int store_evtchn, unsigned long *store_mfn,
unsigned int console_evtchn, unsigned long *console_mfn,
- unsigned int hvm, unsigned int pae, int superpages);
+ unsigned int hvm, unsigned int pae, int superpages,
+ struct restore_callbacks *callbacks);
/**
* xc_domain_restore writes a file to disk that contains the device
* model saved state.
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index c702cf7..32cdd03 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -305,7 +305,7 @@ int libxl__domain_restore_common(libxl_ctx *ctx, uint32_t
domid,
rc = xc_domain_restore(ctx->xch, fd, domid,
state->store_port, &state->store_mfn,
state->console_port, &state->console_mfn,
- info->hvm, info->u.hvm.pae, 0);
+ info->hvm, info->u.hvm.pae, 0, NULL);
if ( rc ) {
LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "restoring domain");
return ERROR_FAIL;
diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c
index ea069ac..8af88e4 100644
--- a/tools/xcutils/xc_restore.c
+++ b/tools/xcutils/xc_restore.c
@@ -46,7 +46,8 @@ main(int argc, char **argv)
superpages = 0;
ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn,
- console_evtchn, &console_mfn, hvm, pae,
superpages);
+ console_evtchn, &console_mfn, hvm, pae, superpages,
+ NULL);
if ( ret == 0 )
{
--
1.8.0
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |