[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [Patch v6 13/13] tools/libxc: noarch restore code
Restore a domain from the new format. This reads and validates the domain and image header and loads the guest memory from the PAGE_DATA records, populating the p2m as it does so. This provides the xc_domain_restore2() function as an alternative to the existing xc_domain_restore(). Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- v6: * Fix error path with rc = 0. * Fix undefined memory issue with creating the populated_pfns array. --- tools/libxc/saverestore/common.h | 6 + tools/libxc/saverestore/restore.c | 628 ++++++++++++++++++++++++++++++++++++- 2 files changed, 633 insertions(+), 1 deletion(-) diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h index 4894cac..4840d3f 100644 --- a/tools/libxc/saverestore/common.h +++ b/tools/libxc/saverestore/common.h @@ -289,6 +289,12 @@ int arch_write_saving_cpu(struct xc_sr_context *ctx); */ int arch_handle_saving_cpu(struct xc_sr_context *ctx, struct xc_sr_record *rec); +/* TODO - find a better way of hiding this. It should be private to + * restore.c, but is needed by x86_pv_localise_page() + */ +int populate_pfns(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types); + #endif /* * Local variables: diff --git a/tools/libxc/saverestore/restore.c b/tools/libxc/saverestore/restore.c index 6624baa..4123d3a 100644 --- a/tools/libxc/saverestore/restore.c +++ b/tools/libxc/saverestore/restore.c @@ -1,5 +1,570 @@ +#include <arpa/inet.h> + #include "common.h" +/* + * Read and validate the Image and Domain headers. + */ +static int read_headers(struct xc_sr_context *ctx) +{ + xc_interface *xch = ctx->xch; + struct xc_sr_ihdr ihdr; + struct xc_sr_dhdr dhdr; + + if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) ) + { + PERROR("Failed to read Image Header from stream"); + return -1; + } + + ihdr.id = ntohl(ihdr.id); + ihdr.version = ntohl(ihdr.version); + ihdr.options = ntohs(ihdr.options); + + if ( ihdr.marker != IHDR_MARKER ) + { + ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker); + return -1; + } + else if ( ihdr.id != IHDR_ID ) + { + ERROR("Invalid ID: Expected 0x%08"PRIx32", Got 0x%08"PRIx32, + IHDR_ID, ihdr.id); + return -1; + } + else if ( ihdr.version != IHDR_VERSION ) + { + ERROR("Invalid Version: Expected %d, Got %d", ihdr.version, IHDR_VERSION); + return -1; + } + else if ( ihdr.options & IHDR_OPT_BIG_ENDIAN ) + { + ERROR("Unable to handle big endian streams"); + return -1; + } + + ctx->restore.format_version = ihdr.version; + + if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) ) + { + PERROR("Failed to read Domain Header from stream"); + return -1; + } + + ctx->restore.guest_type = dhdr.type; + ctx->restore.guest_page_size = (1U << dhdr.page_shift); + + if ( dhdr.xen_major == 0 ) + { + IPRINTF("Found %s domain, converted from legacy stream format", + dhdr_type_to_str(dhdr.type)); + DPRINTF(" Legacy conversion script version %u", dhdr.xen_minor); + } + else + IPRINTF("Found %s domain from Xen %u.%u", + dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor); + return 0; +} + +/** + * Reads a record from the stream, and fills in the record structure. + * + * Returns 0 on success and non-0 on failure. + * + * On success, the records type and size shall be valid. + * - If size is 0, data shall be NULL. + * - If size is non-0, data shall be a buffer allocated by malloc() which must + * be passed to free() by the caller. + * + * On failure, the contents of the record structure are undefined. + */ +static int read_record(struct xc_sr_context *ctx, struct xc_sr_record *rec) +{ + xc_interface *xch = ctx->xch; + struct xc_sr_rhdr rhdr; + size_t datasz; + + if ( read_exact(ctx->fd, &rhdr, sizeof(rhdr)) ) + { + PERROR("Failed to read Record Header from stream"); + return -1; + } + else if ( rhdr.length > REC_LENGTH_MAX ) + { + ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32 + " exceeds max (0x%"PRIx32")", + rhdr.type, rec_type_to_str(rhdr.type), + rhdr.length, REC_LENGTH_MAX); + return -1; + } + + datasz = ROUNDUP(rhdr.length, REC_ALIGN_ORDER); + + if ( datasz ) + { + rec->data = malloc(datasz); + + if ( !rec->data ) + { + ERROR("Unable to allocate %zu bytes for record data (0x%08"PRIx32", %s)", + datasz, rhdr.type, rec_type_to_str(rhdr.type)); + return -1; + } + + if ( read_exact(ctx->fd, rec->data, datasz) ) + { + free(rec->data); + rec->data = NULL; + PERROR("Failed to read %zu bytes of data for record (0x%08"PRIx32", %s)", + datasz, rhdr.type, rec_type_to_str(rhdr.type)); + return -1; + } + } + else + rec->data = NULL; + + rec->type = rhdr.type; + rec->length = rhdr.length; + + return 0; +}; + +/* + * Is a pfn populated? + */ +static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + if ( pfn > ctx->restore.max_populated_pfn ) + return false; + return test_bit(pfn, ctx->restore.populated_pfns); +} + +/* + * Set a pfn as populated, expanding the tracking structures if needed. To + * avoid realloc()ing too excessivly, the size increased to the nearest power + * of two large enough to contain the required pfn. + */ +static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + xc_interface *xch = ctx->xch; + + if ( pfn > ctx->restore.max_populated_pfn ) + { + xen_pfn_t new_max; + size_t old_sz, new_sz; + unsigned long *p; + + /* Round up to the nearest power of two larger than pfn, less 1. */ + new_max = pfn; + new_max |= new_max >> 1; + new_max |= new_max >> 2; + new_max |= new_max >> 4; + new_max |= new_max >> 8; + new_max |= new_max >> 16; + if ( sizeof(xen_pfn_t) >= 8 ) + new_max |= new_max >> 32; + + old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1); + new_sz = bitmap_size(new_max + 1); + p = realloc(ctx->restore.populated_pfns, new_sz); + if ( !p ) + { + ERROR("Failed to realloc populated bitmap"); + errno = ENOMEM; + return -1; + } + + memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz); + + ctx->restore.populated_pfns = p; + ctx->restore.max_populated_pfn = new_max; + } + + set_bit(pfn, ctx->restore.populated_pfns); + + return 0; +} + +/* + * Given a set of pfns, obtain memory from Xen to fill the physmap for the + * unpopulated subset. + */ +int populate_pfns(struct xc_sr_context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = malloc(count * sizeof(*mfns)), + *pfns = malloc(count * sizeof(*pfns)); + unsigned i, nr_pfns = 0; + int rc = -1; + + if ( !mfns || !pfns ) + { + ERROR("Failed to allocate %zu bytes for populating the physmap", + 2 * count * sizeof(*mfns)); + goto err; + } + + for ( i = 0; i < count; ++i ) + { + if ( types[i] != XEN_DOMCTL_PFINFO_XTAB && + types[i] != XEN_DOMCTL_PFINFO_BROKEN && + !pfn_is_populated(ctx, original_pfns[i]) ) + { + pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i]; + ++nr_pfns; + } + } + + if ( nr_pfns ) + { + rc = xc_domain_populate_physmap_exact(xch, ctx->domid, nr_pfns, 0, 0, mfns); + if ( rc ) + { + PERROR("Failed to populate physmap"); + goto err; + } + + for ( i = 0; i < nr_pfns; ++i ) + { + if ( mfns[i] == INVALID_MFN ) + { + ERROR("Populate physmap failed for pfn %u", i); + rc = -1; + goto err; + } + + rc = pfn_set_populated(ctx, pfns[i]); + if ( rc ) + goto err; + ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]); + } + } + + rc = 0; + + err: + free(pfns); + free(mfns); + + return rc; +} + +/* + * Given a list of pfns, their types, and a block of page data from the + * stream, populate and record their types, map the relevent subset and copy + * the data into the guest. + */ +static int process_page_data(struct xc_sr_context *ctx, unsigned count, + xen_pfn_t *pfns, uint32_t *types, void *page_data) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = malloc(count * sizeof(*mfns)); + int *map_errs = malloc(count * sizeof(*map_errs)); + int rc; + void *mapping = NULL, *guest_page = NULL; + unsigned i, /* i indexes the pfns from the record. */ + j, /* j indexes the subset of pfns we decide to map. */ + nr_pages; + + if ( !mfns || !map_errs ) + { + rc = -1; + ERROR("Failed to allocate %zu bytes to process page data", + count * (sizeof(*mfns) + sizeof(*map_errs))); + goto err; + } + + rc = populate_pfns(ctx, count, pfns, types); + if ( rc ) + { + ERROR("Failed to populate pfns for batch of %u pages", count); + goto err; + } + + for ( i = 0, nr_pages = 0; i < count; ++i ) + { + ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]); + + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_NOTAB: + + case XEN_DOMCTL_PFINFO_L1TAB: + case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L2TAB: + case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L3TAB: + case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L4TAB: + case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]); + break; + } + + } + + if ( nr_pages > 0 ) + { + mapping = guest_page = xc_map_foreign_bulk( + xch, ctx->domid, PROT_READ | PROT_WRITE, + mfns, map_errs, nr_pages); + if ( !mapping ) + { + rc = -1; + PERROR("Unable to map %u mfns for %u pages of data", + nr_pages, count); + goto err; + } + } + + for ( i = 0, j = 0; i < count; ++i ) + { + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_XTAB: + case XEN_DOMCTL_PFINFO_BROKEN: + case XEN_DOMCTL_PFINFO_XALLOC: + /* No page data to deal with. */ + continue; + } + + if ( map_errs[j] ) + { + rc = -1; + ERROR("Mapping pfn %lx (mfn %lx, type %#"PRIx32")failed with %d", + pfns[i], mfns[j], types[i], map_errs[j]); + goto err; + } + + /* Undo page normalisation done by the saver. */ + rc = ctx->restore.ops.localise_page(ctx, types[i], page_data); + if ( rc ) + { + DPRINTF("Failed to localise"); + goto err; + } + + if ( ctx->restore.verify ) + { + /* Verify mode - compare incoming data to what we already have. */ + if ( memcmp(guest_page, page_data, PAGE_SIZE) ) + ERROR("verify pfn %lx failed (type %#x)", + pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT); + } + else + { + /* Regular mode - copy incoming data into place. */ + memcpy(guest_page, page_data, PAGE_SIZE); + } + + ++j; + guest_page += PAGE_SIZE; + page_data += PAGE_SIZE; + } + + rc = 0; + + err: + if ( mapping ) + munmap(mapping, nr_pages * PAGE_SIZE); + + free(map_errs); + free(mfns); + + return rc; +} + +/* + * Validate a PAGE_DATA record from the stream, and pass the results to + * process_page_data() to actually perform the legwork. + */ +static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec) +{ + xc_interface *xch = ctx->xch; + struct xc_sr_rec_page_data_header *pages = rec->data; + unsigned i, pages_of_data = 0; + int rc = -1; + + xen_pfn_t *pfns = NULL, pfn; + uint32_t *types = NULL, type; + + if ( rec->length < sizeof(*pages) ) + { + ERROR("PAGE_DATA record truncated: length %"PRIu32", min %zu", + rec->length, sizeof(*pages)); + goto err; + } + else if ( pages->count < 1 ) + { + ERROR("Expected at least 1 pfn in PAGE_DATA record"); + goto err; + } + else if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) ) + { + ERROR("PAGE_DATA record (length %"PRIu32") too short to contain %" + PRIu32" pfns worth of information", rec->length, pages->count); + goto err; + } + + pfns = malloc(pages->count * sizeof(*pfns)); + types = malloc(pages->count * sizeof(*types)); + if ( !pfns || !types ) + { + ERROR("Unable to allocate enough memory for %"PRIu32" pfns", + pages->count); + goto err; + } + + for ( i = 0; i < pages->count; ++i ) + { + pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK; + if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) ) + { + ERROR("pfn %#lx (index %u) outside domain maximum", pfn, i); + goto err; + } + + type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32; + if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) && + ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) ) + { + ERROR("Invalid type %#"PRIx32" for pfn %#lx (index %u)", type, pfn, i); + goto err; + } + else if ( type < XEN_DOMCTL_PFINFO_BROKEN ) + /* NOTAB and all L1 thru L4 tables (including pinned) should have + * a page worth of data in the record. */ + pages_of_data++; + + pfns[i] = pfn; + types[i] = type; + } + + if ( rec->length != (sizeof(*pages) + + (sizeof(uint64_t) * pages->count) + + (PAGE_SIZE * pages_of_data)) ) + { + ERROR("PAGE_DATA record wrong size: length %"PRIu32", expected " + "%zu + %zu + %zu", rec->length, sizeof(*pages), + (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data)); + goto err; + } + + rc = process_page_data(ctx, pages->count, pfns, types, + &pages->pfn[pages->count]); + err: + free(types); + free(pfns); + + return rc; +} + +/* + * Restore a domain. + */ +static int restore(struct xc_sr_context *ctx) +{ + xc_interface *xch = ctx->xch; + struct xc_sr_record rec; + int rc, saved_rc = 0, saved_errno = 0; + + IPRINTF("Restoring domain"); + + rc = ctx->restore.ops.setup(ctx); + if ( rc ) + goto err; + + ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1; + ctx->restore.populated_pfns = bitmap_alloc( + ctx->restore.max_populated_pfn + 1); + if ( !ctx->restore.populated_pfns ) + { + ERROR("Unable to allocate memory for populated_pfns bitmap"); + goto err; + } + + do + { + rc = read_record(ctx, &rec); + if ( rc ) + goto err; + + switch ( rec.type ) + { + case REC_TYPE_END: + DPRINTF("End record"); + break; + + case REC_TYPE_PAGE_DATA: + rc = handle_page_data(ctx, &rec); + break; + + case REC_TYPE_VERIFY: + DPRINTF("Verify mode enabled"); + ctx->restore.verify = true; + break; + + case REC_TYPE_SAVING_CPU: + rc = arch_handle_saving_cpu(ctx, &rec); + break; + + default: + rc = ctx->restore.ops.process_record(ctx, &rec); + break; + } + + free(rec.data); + + if ( rc == RECORD_NOT_PROCESSED ) + { + if ( rec.type & REC_TYPE_OPTIONAL ) + DPRINTF("Ignoring optional record %#x (%s)", + rec.type, rec_type_to_str(rec.type)); + else + { + ERROR("Manditory record %#x (%s) not handled", + rec.type, rec_type_to_str(rec.type)); + rc = -1; + } + } + + if ( rc ) + goto err; + + } while ( rec.type != REC_TYPE_END ); + + rc = ctx->restore.ops.stream_complete(ctx); + if ( rc ) + goto err; + + IPRINTF("Restore successful"); + goto done; + + err: + saved_errno = errno; + saved_rc = rc; + PERROR("Restore failed"); + + done: + free(ctx->restore.populated_pfns); + rc = ctx->restore.ops.cleanup(ctx); + if ( rc ) + PERROR("Failed to clean up"); + + if ( saved_rc ) + { + rc = saved_rc; + errno = saved_errno; + } + + return rc; +} + int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, domid_t store_domid, unsigned int console_evtchn, @@ -8,8 +573,69 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, int checkpointed_stream, struct restore_callbacks *callbacks) { + struct xc_sr_context ctx = + { + .xch = xch, + .fd = io_fd, + }; + + /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions :( */ + ctx.restore.console_evtchn = console_evtchn; + ctx.restore.console_domid = console_domid; + ctx.restore.xenstore_evtchn = store_evtchn; + ctx.restore.xenstore_domid = store_domid; + ctx.restore.callbacks = callbacks; + IPRINTF("In experimental %s", __func__); - return -1; + DPRINTF("fd %d, dom %"PRIu32", hvm %u, pae %u, superpages %d" + ", checkpointed_stream %d", io_fd, dom, hvm, pae, + superpages, checkpointed_stream); + + if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 ) + { + PERROR("Failed to get domain info"); + return -1; + } + + if ( ctx.dominfo.domid != dom ) + { + ERROR("Domain %"PRIu32" does not exist", dom); + return -1; + } + + ctx.domid = dom; + IPRINTF("Restoring domain %"PRIu32, dom); + + if ( read_headers(&ctx) ) + return -1; + + if ( ctx.dominfo.hvm ) + { + ctx.restore.ops = restore_ops_x86_hvm; + if ( restore(&ctx) ) + return -1; + } + else + { + ctx.restore.ops = restore_ops_x86_pv; + if ( restore(&ctx) ) + return -1; + } + + DPRINTF("XenStore: mfn %#lx, dom %d, evt %u", + ctx.restore.xenstore_mfn, + ctx.restore.xenstore_domid, + ctx.restore.xenstore_evtchn); + + DPRINTF("Console: mfn %#lx, dom %d, evt %u", + ctx.restore.console_mfn, + ctx.restore.console_domid, + ctx.restore.console_evtchn); + + *console_mfn = ctx.restore.console_mfn; + *store_mfn = ctx.restore.xenstore_mfn; + + return 0; } /* -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |