[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC Patch v2 05/16] xc_domain_restore: introduce restore_callbacks for colo
In colo mode, SVM also runs. So we should update xc_restore to support it. The first step is: add some callbacks for colo. We add the following callbacks: 1. init(): init the private data used for colo 2. free(): free the resource we allocate and store in the private data 3. get_page(): SVM runs, so we can't update the memory in apply_batch(). This callback will return a page buffer, and apply_batch() will copy the page to this buffer. The content of this buffer should be the current content of this page, so we can use it to do verify. 4. flush_memory(): update the SVM memory and pagetable. 5. update_p2m(): update the SVM p2m page. 6. finish_restore(): wait a new checkpoint. We also add a new structure restore_data to avoid pass too many arguments to these callbacks. This structure stores the variables used in xc_domain_store(), and these variables will be used in the callback. Signed-off-by: Ye Wei <wei.ye1987@xxxxxxxxx> Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx> Signed-off-by: Wen Congyang <wency@xxxxxxxxxxxxxx> --- tools/libxc/xc_domain_restore.c | 264 ++++++++++++++++++++++++++------------- tools/libxc/xenguest.h | 48 +++++++ 2 files changed, 225 insertions(+), 87 deletions(-) diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index 63d36cd..aac2de0 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -1076,7 +1076,8 @@ static int pagebuf_get(xc_interface *xch, struct restore_ctx *ctx, static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, xen_pfn_t* region_mfn, unsigned long* pfn_type, int pae_extended_cr3, struct xc_mmu* mmu, - pagebuf_t* pagebuf, int curbatch) + pagebuf_t* pagebuf, int curbatch, + struct restore_callbacks *callbacks) { int i, j, curpage, nr_mfns; int k, scount; @@ -1085,6 +1086,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; /* Our mapping of the current region (batch) */ char *region_base; + char *target_buf; /* A temporary mapping, and a copy, of one frame of guest memory. */ unsigned long *page = NULL; int nraces = 0; @@ -1241,21 +1243,24 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, region_mfn[i] = ctx->hvm ? pfn : ctx->p2m[pfn]; } - /* Map relevant mfns */ - pfn_err = calloc(j, sizeof(*pfn_err)); - if ( pfn_err == NULL ) + if ( !callbacks || !callbacks->get_page) { - PERROR("allocation for pfn_err failed"); - return -1; - } - region_base = xc_map_foreign_bulk( - xch, dom, PROT_WRITE, region_mfn, pfn_err, j); + /* Map relevant mfns */ + pfn_err = calloc(j, sizeof(*pfn_err)); + if ( pfn_err == NULL ) + { + PERROR("allocation for pfn_err failed"); + return -1; + } + region_base = xc_map_foreign_bulk( + xch, dom, PROT_WRITE, region_mfn, pfn_err, j); - if ( region_base == NULL ) - { - PERROR("map batch failed"); - free(pfn_err); - return -1; + if ( region_base == NULL ) + { + PERROR("map batch failed"); + free(pfn_err); + return -1; + } } for ( i = 0, curpage = -1; i < j; i++ ) @@ -1279,7 +1284,7 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, continue; } - if (pfn_err[i]) + if ( (!callbacks || !callbacks->get_page) && pfn_err[i] ) { ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx", pfn, region_mfn[i], ctx->p2m[pfn]); @@ -1298,8 +1303,20 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, mfn = ctx->p2m[pfn]; + if ( callbacks && callbacks->get_page ) + { + target_buf = callbacks->get_page(&callbacks->comm_data, + callbacks->data, pfn); + if ( !target_buf ) + { + ERROR("Cannot get a buffer to store memory"); + goto err_mapped; + } + } + else + target_buf = region_base + i*PAGE_SIZE; /* In verify mode, we use a copy; otherwise we work in place */ - page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE); + page = pagebuf->verify ? (void *)buf : target_buf; /* Remus - page decompression */ if (pagebuf->compressing) @@ -1357,27 +1374,26 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, if ( pagebuf->verify ) { - int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); + int res = memcmp(buf, target_buf, PAGE_SIZE); if ( res ) { int v; DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " "actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn], - csum_page(region_base + (i + curbatch)*PAGE_SIZE), + csum_page(target_buf), csum_page(buf)); for ( v = 0; v < 4; v++ ) { - unsigned long *p = (unsigned long *) - (region_base + i*PAGE_SIZE); + unsigned long *p = (unsigned long *)target_buf; if ( buf[v] != p[v] ) DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); } } } - if ( !ctx->hvm && + if ( (!callbacks || !callbacks->get_page) && !ctx->hvm && xc_add_mmu_update(xch, mmu, (((unsigned long long)mfn) << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) ) @@ -1390,8 +1406,11 @@ static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, rc = nraces; err_mapped: - munmap(region_base, j*PAGE_SIZE); - free(pfn_err); + if ( !callbacks || !callbacks->get_page ) + { + munmap(region_base, j*PAGE_SIZE); + free(pfn_err); + } return rc; } @@ -1461,6 +1480,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, struct restore_ctx *ctx = &_ctx; struct domain_info_context *dinfo = &ctx->dinfo; + struct restore_data *comm_data = NULL; + void *data = NULL; + DPRINTF("%s: starting restore of new domid %u", __func__, dom); pagebuf_init(&pagebuf); @@ -1582,6 +1604,33 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, goto out; } + /* init callbacks->comm_data */ + if ( callbacks ) + { + callbacks->comm_data.xch = xch; + callbacks->comm_data.dom = dom; + callbacks->comm_data.dinfo = dinfo; + callbacks->comm_data.io_fd = io_fd; + callbacks->comm_data.hvm = hvm; + callbacks->comm_data.pfn_type = pfn_type; + callbacks->comm_data.mmu = mmu; + callbacks->comm_data.p2m_frame_list = p2m_frame_list; + callbacks->comm_data.p2m = ctx->p2m; + comm_data = &callbacks->comm_data; + + /* init callbacks->data */ + if ( callbacks->init) + { + callbacks->data = NULL; + if (callbacks->init(&callbacks->comm_data, &callbacks->data) < 0 ) + { + ERROR("Could not initialise restore callbacks private data"); + goto out; + } + } + data = callbacks->data; + } + xc_report_progress_start(xch, "Reloading memory pages", dinfo->p2m_size); /* @@ -1676,7 +1725,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, int brc; brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type, - pae_extended_cr3, mmu, &pagebuf, curbatch); + pae_extended_cr3, mmu, &pagebuf, curbatch, + callbacks); if ( brc < 0 ) goto out; @@ -1761,6 +1811,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, goto finish; } +getpages: // DPRINTF("Buffered checkpoint\n"); if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) { @@ -1902,58 +1953,69 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, } } - /* - * Pin page tables. Do this after writing to them as otherwise Xen - * will barf when doing the type-checking. - */ - nr_pins = 0; - for ( i = 0; i < dinfo->p2m_size; i++ ) + if ( callbacks && callbacks->flush_memory ) { - if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) - continue; - - switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + if ( callbacks->flush_memory(comm_data, data) < 0 ) { - case XEN_DOMCTL_PFINFO_L1TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; - break; + ERROR("Error doing callbacks->flush_memory()"); + goto out; + } + } + else + { + /* + * Pin page tables. Do this after writing to them as otherwise Xen + * will barf when doing the type-checking. + */ + nr_pins = 0; + for ( i = 0; i < dinfo->p2m_size; i++ ) + { + if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) + continue; - case XEN_DOMCTL_PFINFO_L2TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; - break; + switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) + { + case XEN_DOMCTL_PFINFO_L1TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; + break; - case XEN_DOMCTL_PFINFO_L3TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; - break; + case XEN_DOMCTL_PFINFO_L2TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; + break; - case XEN_DOMCTL_PFINFO_L4TAB: - pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; - break; + case XEN_DOMCTL_PFINFO_L3TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; + break; - default: - continue; - } + case XEN_DOMCTL_PFINFO_L4TAB: + pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; + break; - pin[nr_pins].arg1.mfn = ctx->p2m[i]; - nr_pins++; + default: + continue; + } - /* Batch full? Then flush. */ - if ( nr_pins == MAX_PIN_BATCH ) - { - if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 ) + pin[nr_pins].arg1.mfn = ctx->p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if ( nr_pins == MAX_PIN_BATCH ) { - PERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; + if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 ) + { + PERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } + nr_pins = 0; } - nr_pins = 0; } - } - /* Flush final partial batch. */ - if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) ) - { - PERROR("Failed to pin batch of %d page tables", nr_pins); - goto out; + /* Flush final partial batch. */ + if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) ) + { + PERROR("Failed to pin batch of %d page tables", nr_pins); + goto out; + } } DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns); @@ -2052,6 +2114,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, *console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn)]; SET_FIELD(start_info, console.domU.mfn, *console_mfn); SET_FIELD(start_info, console.domU.evtchn, console_evtchn); + callbacks->comm_data.store_mfn = *store_mfn; + callbacks->comm_data.console_mfn = *console_mfn; munmap(start_info, PAGE_SIZE); } /* Uncanonicalise each GDT frame number. */ @@ -2199,37 +2263,61 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, /* leave wallclock time. set by hypervisor */ munmap(new_shared_info, PAGE_SIZE); - /* Uncanonicalise the pfn-to-mfn table frame-number list. */ - for ( i = 0; i < P2M_FL_ENTRIES; i++ ) + if ( callbacks && callbacks->update_p2m ) { - pfn = p2m_frame_list[i]; - if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) + if ( callbacks->update_p2m(comm_data, data) < 0 ) { - ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn); + ERROR("Error doing callbacks->update_p2m()"); goto out; } - p2m_frame_list[i] = ctx->p2m[pfn]; } - - /* Copy the P2M we've constructed to the 'live' P2M */ - if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE, - p2m_frame_list, P2M_FL_ENTRIES)) ) + else { - PERROR("Couldn't map p2m table"); - goto out; + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ + for ( i = 0; i < P2M_FL_ENTRIES; i++ ) + { + pfn = p2m_frame_list[i]; + if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) + { + ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn); + goto out; + } + p2m_frame_list[i] = ctx->p2m[pfn]; + } + + /* Copy the P2M we've constructed to the 'live' P2M */ + if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE, + p2m_frame_list, P2M_FL_ENTRIES)) ) + { + PERROR("Couldn't map p2m table"); + goto out; + } + + /* If the domain we're restoring has a different word size to ours, + * we need to adjust the live_p2m assignment appropriately */ + if ( dinfo->guest_width > sizeof (xen_pfn_t) ) + for ( i = dinfo->p2m_size - 1; i >= 0; i-- ) + ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i]; + else if ( dinfo->guest_width < sizeof (xen_pfn_t) ) + for ( i = 0; i < dinfo->p2m_size; i++ ) + ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i]; + else + memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t)); + munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE); } - /* If the domain we're restoring has a different word size to ours, - * we need to adjust the live_p2m assignment appropriately */ - if ( dinfo->guest_width > sizeof (xen_pfn_t) ) - for ( i = dinfo->p2m_size - 1; i >= 0; i-- ) - ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i]; - else if ( dinfo->guest_width < sizeof (xen_pfn_t) ) - for ( i = 0; i < dinfo->p2m_size; i++ ) - ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i]; - else - memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t)); - munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE); + if ( callbacks && callbacks->finish_restotre ) + { + rc = callbacks->finish_restotre(comm_data, data); + if ( rc == 1 ) + goto getpages; + + if ( rc < 0 ) + { + ERROR("Er1ror doing callbacks->finish_restotre()"); + goto out; + } + } rc = xc_dom_gnttab_seed(xch, dom, *console_mfn, *store_mfn, console_domid, store_domid); @@ -2329,6 +2417,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, rc = 0; out: + if ( callbacks && callbacks->free && callbacks->data) + callbacks->free(&callbacks->comm_data, callbacks->data); if ( (rc != 0) && (dom != 0) ) xc_domain_destroy(xch, dom); xc_hypercall_buffer_free(xch, ctxt); diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index 4714bd2..4bb444a 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -90,12 +90,60 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter unsigned long vm_generationid_addr); +/* pass the variable defined in xc_domain_restore() to callback. Use + * this structure for the following purpose: + * 1. avoid too many arguments. + * 2. different callback implemention may need different arguments. + * Just add the information you need here. + */ +struct restore_data +{ + xc_interface *xch; + uint32_t dom; + struct domain_info_context *dinfo; + int io_fd; + int hvm; + unsigned long *pfn_type; + struct xc_mmu *mmu; + unsigned long *p2m_frame_list; + unsigned long *p2m; + unsigned long console_mfn; + unsigned long store_mfn; +}; + /* callbacks provided by xc_domain_restore */ struct restore_callbacks { + /* callback to init data */ + int (*init)(struct restore_data *comm_data, void **data); + /* callback to free data */ + void (*free)(struct restore_data *comm_data, void *data); + /* callback to get a buffer to store memory data that is transfered + * from the source machine. + */ + char *(*get_page)(struct restore_data *comm_data, void *data, + unsigned long pfn); + /* callback to flush memory that is transfered from the source machine + * to the guest. Update the guest's pagetable if necessary. + */ + int (*flush_memory)(struct restore_data *comm_data, void *data); + /* callback to update the guest's p2m table */ + int (*update_p2m)(struct restore_data *comm_data, void *data); + /* callback to finish restore process. It is called before xc_domain_restore() + * returns. + * + * Return value: + * -1: error + * 0: continue to start vm + * 1: continue to do a checkpoint + */ + int (*finish_restotre)(struct restore_data *comm_data, void *data); /* callback to restore toolstack specific data */ int (*toolstack_restore)(uint32_t domid, const uint8_t *buf, uint32_t size, void* data); + /* xc_domain_restore() init it */ + struct restore_data comm_data; + /* to be provided as the last argument to each callback function */ void* data; }; -- 1.7.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |