[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4 5/9] tools/libxc: common code
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx> --- tools/libxc/saverestore/common.c | 87 ++++++ tools/libxc/saverestore/common.h | 172 ++++++++++++ tools/libxc/saverestore/common_x86.c | 54 ++++ tools/libxc/saverestore/common_x86.h | 21 ++ tools/libxc/saverestore/common_x86_hvm.c | 53 ++++ tools/libxc/saverestore/common_x86_pv.c | 431 ++++++++++++++++++++++++++++++ tools/libxc/saverestore/common_x86_pv.h | 104 +++++++ tools/libxc/saverestore/restore.c | 288 ++++++++++++++++++++ tools/libxc/saverestore/save.c | 42 +++ 9 files changed, 1252 insertions(+) create mode 100644 tools/libxc/saverestore/common_x86.c create mode 100644 tools/libxc/saverestore/common_x86.h create mode 100644 tools/libxc/saverestore/common_x86_hvm.c create mode 100644 tools/libxc/saverestore/common_x86_pv.c create mode 100644 tools/libxc/saverestore/common_x86_pv.h diff --git a/tools/libxc/saverestore/common.c b/tools/libxc/saverestore/common.c index de2e727..b159c4c 100644 --- a/tools/libxc/saverestore/common.c +++ b/tools/libxc/saverestore/common.c @@ -1,3 +1,5 @@ +#include <assert.h> + #include "common.h" static const char *dhdr_types[] = @@ -52,6 +54,91 @@ const char *rec_type_to_str(uint32_t type) return "Reserved"; } +int write_split_record(struct context *ctx, struct record *rec, + void *buf, size_t sz) +{ + static const char zeroes[7] = { 0 }; + xc_interface *xch = ctx->xch; + uint32_t combined_length = rec->length + sz; + size_t record_length = (combined_length + 7) & ~7UL; + + if ( record_length > REC_LENGTH_MAX ) + { + ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32 + " exceeds max (0x%"PRIx32")", rec->type, + rec_type_to_str(rec->type), rec->length, REC_LENGTH_MAX); + return -1; + } + + if ( rec->length ) + assert(rec->data); + if ( sz ) + assert(buf); + + if ( write_exact(ctx->fd, &rec->type, sizeof rec->type) || + write_exact(ctx->fd, &combined_length, sizeof rec->length) || + (rec->length && write_exact(ctx->fd, rec->data, rec->length)) || + (sz && write_exact(ctx->fd, buf, sz)) || + write_exact(ctx->fd, zeroes, record_length - combined_length) ) + { + PERROR("Unable to write record to stream"); + return -1; + } + + return 0; +} + +int read_record(struct context *ctx, struct record *rec) +{ + xc_interface *xch = ctx->xch; + struct rhdr rhdr; + size_t datasz; + + if ( read_exact(ctx->fd, &rhdr, sizeof rhdr) ) + { + PERROR("Failed to read Record Header from stream"); + return -1; + } + else if ( rhdr.length > REC_LENGTH_MAX ) + { + ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32 + " exceeds max (0x%"PRIx32")", + rhdr.type, rec_type_to_str(rhdr.type), + rhdr.length, REC_LENGTH_MAX); + return -1; + } + + datasz = (rhdr.length + 7) & ~7U; + + if ( datasz ) + { + rec->data = malloc(datasz); + + if ( !rec->data ) + { + ERROR("Unable to allocate %zu bytes for record data (0x%08"PRIx32", %s)", + datasz, rhdr.type, rec_type_to_str(rhdr.type)); + return -1; + } + + if ( read_exact(ctx->fd, rec->data, datasz) ) + { + free(rec->data); + rec->data = NULL; + PERROR("Failed to read %zu bytes of data for record (0x%08"PRIx32", %s)", + datasz, rhdr.type, rec_type_to_str(rhdr.type)); + return -1; + } + } + else + rec->data = NULL; + + rec->type = rhdr.type; + rec->length = rhdr.length; + + return 0; +}; + /* * Local variables: * mode: C diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h index fff0a39..a35eda7 100644 --- a/tools/libxc/saverestore/common.h +++ b/tools/libxc/saverestore/common.h @@ -1,7 +1,20 @@ #ifndef __COMMON__H #define __COMMON__H +#include <stdbool.h> + +// Hack out junk from the namespace +#define mfn_to_pfn __UNUSED_mfn_to_pfn +#define pfn_to_mfn __UNUSED_pfn_to_mfn + #include "../xg_private.h" +#include "../xg_save_restore.h" +#include "../xc_dom.h" +#include "../xc_bitops.h" + +#undef mfn_to_pfn +#undef pfn_to_mfn + #include "stream_format.h" @@ -11,6 +24,165 @@ const char *dhdr_type_to_str(uint32_t type); const char *rec_type_to_str(uint32_t type); +struct context; + +struct save_restore_ops +{ + bool (*pfn_is_valid)(struct context *ctx, xen_pfn_t pfn); + xen_pfn_t (*pfn_to_gfn)(struct context *ctx, xen_pfn_t pfn); + void (*set_gfn)(struct context *ctx, xen_pfn_t pfn, xen_pfn_t gfn); + void (*set_page_type)(struct context *ctx, xen_pfn_t pfn, xen_pfn_t type); + int (*normalise_page)(struct context *ctx, xen_pfn_t type, void **page); + int (*localise_page)(struct context *ctx, uint32_t type, void *page); +}; + +struct context +{ + xc_interface *xch; + uint32_t domid; + int fd; + + xc_dominfo_t dominfo; + + struct save_restore_ops ops; + + union + { + struct + { + /* From Image Header */ + uint32_t format_version; + + /* From Domain Header */ + uint32_t guest_type; + uint32_t guest_page_size; + + unsigned long xenstore_mfn, console_mfn; + unsigned int xenstore_evtchn, console_evtchn; + domid_t xenstore_domid, console_domid; + + struct restore_callbacks *callbacks; + + /* Bitmap of currently populated PFNs during restore. */ + unsigned long *populated_pfns; + unsigned int max_populated_pfn; + } restore; + + struct + { + unsigned long p2m_size; + + struct save_callbacks *callbacks; + } save; + }; + + xen_pfn_t *batch_pfns; + unsigned nr_batch_pfns; + unsigned long *deferred_pages; + + union + { + struct + { + /* 4 or 8; 32 or 64 bit domain */ + unsigned int width; + /* 3 or 4 pagetable levels */ + unsigned int levels; + + + /* Maximum Xen frame */ + unsigned long max_mfn; + /* Read-only machine to phys map */ + xen_pfn_t *m2p; + /* first mfn of the compat m2p (Only needed for 32bit PV guests) */ + xen_pfn_t compat_m2p_mfn0; + /* Number of m2p frames mapped */ + unsigned long nr_m2p_frames; + + + /* Maximum guest frame */ + unsigned long max_pfn; + /* Frames per page in guest p2m */ + unsigned int fpp; + + /* Number of frames making up the p2m */ + unsigned int p2m_frames; + /* Guest's phys to machine map. Mapped read-only (save) or + * allocated locally (restore). Uses guest unsigned longs. */ + void *p2m; + /* The guest pfns containing the p2m leaves */ + xen_pfn_t *p2m_pfns; + /* Types for each page */ + uint32_t *pfn_types; + + /* Read-only mapping of guests shared info page */ + shared_info_any_t *shinfo; + } x86_pv; + }; +}; + +/* + * Write the image and domain headers to the stream. + * (to eventually make static in save.c) + */ +int write_headers(struct context *ctx, uint16_t guest_type); + +extern struct save_restore_ops save_restore_ops_x86_pv; +extern struct save_restore_ops save_restore_ops_x86_hvm; + +struct record +{ + uint32_t type; + uint32_t length; + void *data; +}; + +/* + * Writes a split record to the stream, applying correct padding where + * appropriate. It is common when sending records containing blobs from Xen + * that the header and blob data are separate. This function accepts a second + * buffer and length, and will merge it with the main record when sending. + * + * Records with a non-zero length must provide a valid data field; records + * with a 0 length shall have their data field ignored. + * + * Returns 0 on success and non0 on failure. + */ +int write_split_record(struct context *ctx, struct record *rec, void *buf, size_t sz); + +/* + * Writes a record to the stream, applying correct padding where appropriate. + * Records with a non-zero length must provide a valid data field; records + * with a 0 length shall have their data field ignored. + * + * Returns 0 on success and non0 on failure. + */ +static inline int write_record(struct context *ctx, struct record *rec) +{ + return write_split_record(ctx, rec, NULL, 0); +} + +/* + * Reads a record from the stream, and fills in the record structure. + * + * Returns 0 on success and non-0 on failure. + * + * On success, the records type and size shall be valid. + * - If size is 0, data shall be NULL. + * - If size is non-0, data shall be a buffer allocated by malloc() which must + * be passed to free() by the caller. + * + * On failure, the contents of the record structure are undefined. + */ +int read_record(struct context *ctx, struct record *rec); + +int write_page_data_and_pause(struct context *ctx); + +int handle_page_data(struct context *ctx, struct record *rec); + +int populate_pfns(struct context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types); + #endif /* * Local variables: diff --git a/tools/libxc/saverestore/common_x86.c b/tools/libxc/saverestore/common_x86.c new file mode 100644 index 0000000..0a3d555 --- /dev/null +++ b/tools/libxc/saverestore/common_x86.c @@ -0,0 +1,54 @@ +#include "common_x86.h" + +int write_tsc_info(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + struct rec_tsc_info tsc = { 0 }; + struct record rec = + { + .type = REC_TYPE_tsc_info, + .length = sizeof tsc, + .data = &tsc + }; + + if ( xc_domain_get_tsc_info(xch, ctx->domid, &tsc.mode, + &tsc.nsec, &tsc.khz, &tsc.incarnation) < 0 ) + { + PERROR("Unable to obtain TSC information"); + return -1; + } + + return write_record(ctx, &rec); +} + +int handle_tsc_info(struct context *ctx, struct record *rec) +{ + xc_interface *xch = ctx->xch; + struct rec_tsc_info *tsc = rec->data; + + if ( rec->length != sizeof *tsc ) + { + ERROR("TSC_INFO record wrong size: length %"PRIu32", expected %zu", + rec->length, sizeof *tsc); + return -1; + } + + if ( xc_domain_set_tsc_info(xch, ctx->domid, tsc->mode, + tsc->nsec, tsc->khz, tsc->incarnation) ) + { + PERROR("Unable to set TSC information"); + return -1; + } + + return 0; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tools/libxc/saverestore/common_x86.h b/tools/libxc/saverestore/common_x86.h new file mode 100644 index 0000000..429532a --- /dev/null +++ b/tools/libxc/saverestore/common_x86.h @@ -0,0 +1,21 @@ +#ifndef __COMMON_X86__H +#define __COMMON_X86__H + +#include "common.h" + +/* Obtains and writes domain TSC information to the stream */ +int write_tsc_info(struct context *ctx); + +/* Parses domain TSC information from the stream */ +int handle_tsc_info(struct context *ctx, struct record *rec); + +#endif +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tools/libxc/saverestore/common_x86_hvm.c b/tools/libxc/saverestore/common_x86_hvm.c new file mode 100644 index 0000000..0b9aac2 --- /dev/null +++ b/tools/libxc/saverestore/common_x86_hvm.c @@ -0,0 +1,53 @@ +#include "common.h" + +static bool x86_hvm_pfn_is_valid(struct context *ctx, xen_pfn_t pfn) +{ + return true; +} + +static xen_pfn_t x86_hvm_pfn_to_gfn(struct context *ctx, xen_pfn_t pfn) +{ + return pfn; +} + +static void x86_hvm_set_gfn(struct context *ctx, xen_pfn_t pfn, + xen_pfn_t gfn) +{ + /* no op */ +} + +static void x86_hvm_set_page_type(struct context *ctx, xen_pfn_t pfn, xen_pfn_t type) +{ + /* no-op */ +} + +static int x86_hvm_normalise_page(struct context *ctx, xen_pfn_t type, void **page) +{ + /* no-op */ + return 0; +} + +static int x86_hvm_localise_page(struct context *ctx, uint32_t type, void *page) +{ + /* no-op */ + return 0; +} + +struct save_restore_ops save_restore_ops_x86_hvm = { + .pfn_is_valid = x86_hvm_pfn_is_valid, + .pfn_to_gfn = x86_hvm_pfn_to_gfn, + .set_gfn = x86_hvm_set_gfn, + .set_page_type = x86_hvm_set_page_type, + .normalise_page = x86_hvm_normalise_page, + .localise_page = x86_hvm_localise_page +}; + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tools/libxc/saverestore/common_x86_pv.c b/tools/libxc/saverestore/common_x86_pv.c new file mode 100644 index 0000000..35bce27 --- /dev/null +++ b/tools/libxc/saverestore/common_x86_pv.c @@ -0,0 +1,431 @@ +#include <assert.h> + +#include "common_x86_pv.h" + +xen_pfn_t mfn_to_pfn(struct context *ctx, xen_pfn_t mfn) +{ + assert(mfn <= ctx->x86_pv.max_mfn); + return ctx->x86_pv.m2p[mfn]; +} + +static bool x86_pv_pfn_is_valid(struct context *ctx, xen_pfn_t pfn) +{ + return pfn <= ctx->x86_pv.max_pfn; +} + +static xen_pfn_t x86_pv_pfn_to_gfn(struct context *ctx, xen_pfn_t pfn) +{ + assert(pfn <= ctx->x86_pv.max_pfn); + + if ( ctx->x86_pv.width == sizeof (uint64_t) ) + /* 64 bit guest. Need to truncate their pfns for 32 bit toolstacks */ + return ((uint64_t *)ctx->x86_pv.p2m)[pfn]; + else + { + /* 32 bit guest. Need to expand INVALID_MFN fot 64 bit toolstacks */ + uint32_t mfn = ((uint32_t *)ctx->x86_pv.p2m)[pfn]; + + return mfn == ~0U ? INVALID_MFN : mfn; + } +} + +static void x86_pv_set_page_type(struct context *ctx, xen_pfn_t pfn, + unsigned long type) +{ + assert(pfn <= ctx->x86_pv.max_pfn); + + ctx->x86_pv.pfn_types[pfn] = type; +} + +static void x86_pv_set_gfn(struct context *ctx, xen_pfn_t pfn, + xen_pfn_t mfn) +{ + assert(pfn <= ctx->x86_pv.max_pfn); + + if ( ctx->x86_pv.width == sizeof (uint64_t) ) + /* 64 bit guest. Need to expand INVALID_MFN for 32 bit toolstacks */ + ((uint64_t *)ctx->x86_pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn; + else + /* 32 bit guest. Can safely truncate INVALID_MFN fot 64 bit toolstacks */ + ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn; +} + +static int normalise_pagetable(struct context *ctx, const uint64_t *src, + uint64_t *dst, unsigned long type) +{ + xc_interface *xch = ctx->xch; + uint64_t pte; + unsigned i, xen_first = -1, xen_last = -1; /* Indicies of Xen mappings */ + + type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + if ( ctx->x86_pv.levels == 4 ) + { + /* 64bit guests only have Xen mappings in their L4 tables */ + if ( type == XEN_DOMCTL_PFINFO_L4TAB ) + { + xen_first = 256; + xen_last = 271; + } + } + else + { + switch ( type ) + { + case XEN_DOMCTL_PFINFO_L4TAB: + ERROR("??? Found L4 table for 32bit guest"); + errno = EINVAL; + return -1; + + case XEN_DOMCTL_PFINFO_L3TAB: + /* 32bit guests can only use the first 4 entries of their L3 tables. + * All other are potentially used by Xen. */ + xen_first = 4; + xen_last = 512; + break; + + case XEN_DOMCTL_PFINFO_L2TAB: + /* It is hard to spot Xen mappings in a 32bit guest's L2. Most + * are normal but only a few will have Xen mappings. + * + * 428 = (HYPERVISOR_VIRT_START_PAE >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff + * + * ...which is conveniently unavailable to us in a 64bit build. + */ + if ( pte_to_frame(ctx, src[428]) == ctx->x86_pv.compat_m2p_mfn0 ) + { + xen_first = 428; + xen_last = 512; + } + break; + } + } + + for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i ) + { + xen_pfn_t mfn, pfn; + + pte = src[i]; + + /* Remove Xen mappings: Xen will reconstruct on the other side */ + if ( i >= xen_first && i <= xen_last ) + pte = 0; + + if ( pte & _PAGE_PRESENT ) + { + mfn = pte_to_frame(ctx, pte); + + if ( pte & _PAGE_PSE ) + { + ERROR("Cannot migrate superpage (L%lu[%u]: 0x%016"PRIx64")", + type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte); + errno = E2BIG; + return -1; + } + + if ( !mfn_in_pseudophysmap(ctx, mfn) ) + { + /* This is expected during the live part of migration given + * split pagetable updates, active grant mappings etc. The + * pagetable will need to be resent after pausing. It is + * however fatal if we have already paused the domain. */ + if ( !ctx->dominfo.paused ) + errno = EAGAIN; + else + { + ERROR("Bad MFN for L%lu[%u]", + type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i); + pseudophysmap_walk(ctx, mfn); + errno = ERANGE; + } + return -1; + } + else + pfn = mfn_to_pfn(ctx, mfn); + + update_pte(ctx, &pte, pfn); + } + + dst[i] = pte; + } + + return 0; +} + +static int x86_pv_normalise_page(struct context *ctx, xen_pfn_t type, + void **page) +{ + xc_interface *xch = ctx->xch; + void *local_page; + int rc; + + type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB ) + return 0; + + local_page = malloc(PAGE_SIZE); + if ( !local_page ) + { + ERROR("Unable to allocate scratch page"); + rc = -1; + goto out; + } + + rc = normalise_pagetable(ctx, *page, local_page, type); + *page = local_page; + + out: + return rc; +} + +static int x86_pv_localise_page(struct context *ctx, uint32_t type, void *page) +{ + xc_interface *xch = ctx->xch; + uint64_t *table = page; + uint64_t pte; + unsigned i; + + type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; + + /* Only page tables need localisation. */ + if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB ) + return 0; + + for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i ) + { + pte = table[i]; + + if ( pte & _PAGE_PRESENT ) + { + xen_pfn_t mfn, pfn; + + pfn = pte_to_frame(ctx, pte); + mfn = ctx->ops.pfn_to_gfn(ctx, pfn); + + if ( mfn == INVALID_MFN ) + { + if ( populate_pfns(ctx, 1, &pfn, &type) ) + return -1; + + mfn = ctx->ops.pfn_to_gfn(ctx, pfn); + } + + if ( !mfn_in_pseudophysmap(ctx, mfn) ) + { + ERROR("Bad MFN for L%lu[%u]", + type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i); + pseudophysmap_walk(ctx, mfn); + errno = ERANGE; + return -1; + } + + update_pte(ctx, &pte, mfn); + + table[i] = pte; + } + } + + return 0; +} + +struct save_restore_ops save_restore_ops_x86_pv = { + .pfn_is_valid = x86_pv_pfn_is_valid, + .pfn_to_gfn = x86_pv_pfn_to_gfn, + .set_page_type = x86_pv_set_page_type, + .set_gfn = x86_pv_set_gfn, + .normalise_page = x86_pv_normalise_page, + .localise_page = x86_pv_localise_page, +}; + +bool mfn_in_pseudophysmap(struct context *ctx, xen_pfn_t mfn) +{ + return ( (mfn <= ctx->x86_pv.max_mfn) && + (mfn_to_pfn(ctx, mfn) <= ctx->x86_pv.max_pfn) && + (ctx->ops.pfn_to_gfn(ctx, mfn_to_pfn(ctx, mfn) == mfn)) ); +} + +void pseudophysmap_walk(struct context *ctx, xen_pfn_t mfn) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t pfn = ~0UL; + + ERROR("mfn %#lx, max %#lx", mfn, ctx->x86_pv.max_mfn); + + if ( (mfn != ~0UL) && (mfn <= ctx->x86_pv.max_mfn) ) + { + pfn = ctx->x86_pv.m2p[mfn]; + ERROR(" m2p[%#lx] = %#lx, max_pfn %#lx", + mfn, pfn, ctx->x86_pv.max_pfn); + } + + if ( (pfn != ~0UL) && (pfn <= ctx->x86_pv.max_pfn) ) + ERROR(" p2m[%#lx] = %#lx", + pfn, ctx->ops.pfn_to_gfn(ctx, pfn)); +} + +xen_pfn_t cr3_to_mfn(struct context *ctx, uint64_t cr3) +{ + if ( ctx->x86_pv.width == 8 ) + return cr3 >> 12; + else + return (((uint32_t)cr3 >> 12) | ((uint32_t)cr3 << 20)); +} + +uint64_t mfn_to_cr3(struct context *ctx, xen_pfn_t mfn) +{ + if ( ctx->x86_pv.width == 8 ) + return ((uint64_t)mfn) << 12; + else + return (((uint32_t)mfn << 12) | ((uint32_t)mfn >> 20)); +} + +int x86_pv_domain_info(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + unsigned int guest_width, guest_levels, fpp; + int max_pfn; + + /* Get the domain width */ + if ( xc_domain_get_guest_width(xch, ctx->domid, &guest_width) ) + { + PERROR("Unable to determine dom%d's width", ctx->domid); + return -1; + } + else if ( guest_width == 4 ) + guest_levels = 3; + else if ( guest_width == 8 ) + guest_levels = 4; + else + { + ERROR("Invalid guest width %d. Expected 32 or 64", guest_width); + return -1; + } + ctx->x86_pv.width = guest_width; + ctx->x86_pv.levels = guest_levels; + ctx->x86_pv.fpp = fpp = PAGE_SIZE / ctx->x86_pv.width; + + DPRINTF("%d bits, %d levels", guest_width * 8, guest_levels); + + /* Get the domains maximum pfn */ + max_pfn = xc_domain_maximum_gpfn(xch, ctx->domid); + if ( max_pfn < 0 ) + { + PERROR("Unable to obtain guests max pfn"); + return -1; + } + else if ( max_pfn >= ~XEN_DOMCTL_PFINFO_LTAB_MASK ) + { + errno = E2BIG; + PERROR("Cannot save a guest this large %#x"); + return -1; + } + else if ( max_pfn > 0 ) + { + ctx->x86_pv.max_pfn = max_pfn; + ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp; + + DPRINTF("max_pfn %#x, p2m_frames %d", max_pfn, ctx->x86_pv.p2m_frames); + } + + return 0; +} + +int x86_pv_map_m2p(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + long max_page = xc_maximum_ram_page(xch); + unsigned long m2p_chunks, m2p_size; + privcmd_mmap_entry_t *entries = NULL; + xen_pfn_t *extents_start = NULL; + int rc = -1, i; + + if ( max_page < 0 ) + { + PERROR("Failed to get maximum ram page"); + goto err; + } + + ctx->x86_pv.max_mfn = max_page; + m2p_size = M2P_SIZE(ctx->x86_pv.max_mfn); + m2p_chunks = M2P_CHUNKS(ctx->x86_pv.max_mfn); + + extents_start = malloc(m2p_chunks * sizeof(xen_pfn_t)); + if ( !extents_start ) + { + ERROR("Unable to allocate %zu bytes for m2p mfns", + m2p_chunks * sizeof(xen_pfn_t)); + goto err; + } + + if ( xc_machphys_mfn_list(xch, m2p_chunks, extents_start) ) + { + PERROR("Failed to get m2p mfn list"); + goto err; + } + + entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)); + if ( !entries ) + { + ERROR("Unable to allocate %zu bytes for m2p mapping mfns", + m2p_chunks * sizeof(privcmd_mmap_entry_t)); + goto err; + } + + for ( i = 0; i < m2p_chunks; ++i ) + entries[i].mfn = extents_start[i]; + + ctx->x86_pv.m2p = xc_map_foreign_ranges( + xch, DOMID_XEN, m2p_size, PROT_READ, + M2P_CHUNK_SIZE, entries, m2p_chunks); + + if ( !ctx->x86_pv.m2p ) + { + PERROR("Failed to mmap m2p ranges"); + goto err; + } + + ctx->x86_pv.nr_m2p_frames = (M2P_CHUNK_SIZE >> PAGE_SHIFT) * m2p_chunks; + +#ifdef __i386__ + /* 32 bit toolstacks automatically get the compat m2p */ + ctx->x86_pv.compat_m2p_mfn0 = entries[0].mfn; +#else + /* 64 bit toolstacks need to ask Xen specially for it */ + { + struct xen_machphys_mfn_list xmml = { + .max_extents = 1, + .extent_start = { &ctx->x86_pv.compat_m2p_mfn0 } + }; + + rc = do_memory_op(xch, XENMEM_machphys_compat_mfn_list, + &xmml, sizeof xmml); + if ( rc || xmml.nr_extents != 1 ) + { + PERROR("Failed to get compat mfn list from Xen"); + rc = -1; + goto err; + } + } +#endif + + /* All Done */ + rc = 0; + DPRINTF("max_mfn %#lx", ctx->x86_pv.max_mfn); + +err: + free(entries); + free(extents_start); + + return rc; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tools/libxc/saverestore/common_x86_pv.h b/tools/libxc/saverestore/common_x86_pv.h new file mode 100644 index 0000000..c7315b6 --- /dev/null +++ b/tools/libxc/saverestore/common_x86_pv.h @@ -0,0 +1,104 @@ +#ifndef __COMMON_X86_PV_H +#define __COMMON_X86_PV_H + +#include "common_x86.h" + +/* + * Convert an mfn to a pfn, given Xens m2p table. + * + * Caller must ensure that the requested mfn is in range. + */ +xen_pfn_t mfn_to_pfn(struct context *ctx, xen_pfn_t mfn); + +/* + * Convert a pfn to an mfn, given the guests p2m table. + * + * Caller must ensure that the requested pfn is in range. + */ +xen_pfn_t pfn_to_mfn(struct context *ctx, xen_pfn_t pfn); + +/* + * Set a mapping in the p2m table. + * + * Caller must ensure that the requested pfn is in range. + */ +void set_p2m(struct context *ctx, xen_pfn_t pfn, xen_pfn_t mfn); + +/* + * Query whether a particular mfn is valid in the physmap of a guest. + */ +bool mfn_in_pseudophysmap(struct context *ctx, xen_pfn_t mfn); + +/* + * Debug a particular mfn by walking the p2m and m2p. + */ +void pseudophysmap_walk(struct context *ctx, xen_pfn_t mfn); + +/* + * Convert a PV cr3 field to an mfn. + */ +xen_pfn_t cr3_to_mfn(struct context *ctx, uint64_t cr3); + +/* + * Convert an mfn to a PV cr3 field. + */ +uint64_t mfn_to_cr3(struct context *ctx, xen_pfn_t mfn); + +/* + * Extract an MFN from a Pagetable Entry. + */ +static inline xen_pfn_t pte_to_frame(struct context *ctx, uint64_t pte) +{ + if ( ctx->x86_pv.width == 8 ) + return (pte >> PAGE_SHIFT) & ((1ULL << (52 - PAGE_SHIFT)) - 1); + else + return (pte >> PAGE_SHIFT) & ((1ULL << (44 - PAGE_SHIFT)) - 1); +} + +static inline void update_pte(struct context *ctx, uint64_t *pte, xen_pfn_t pfn) +{ + if ( ctx->x86_pv.width == 8 ) + *pte &= ~(((1ULL << (52 - PAGE_SHIFT)) - 1) << PAGE_SHIFT); + else + *pte &= ~(((1ULL << (44 - PAGE_SHIFT)) - 1) << PAGE_SHIFT); + + *pte |= (uint64_t)pfn << PAGE_SHIFT; +} + +/* + * Get current domain information. + * + * Fills ctx->x86_pv + * - .width + * - .levels + * - .fpp + * - .p2m_frames + * + * Used by the save side to create the X86_PV_INFO record, and by the restore + * side to verify the incoming stream. + * + * Returns 0 on success and non-zero on error. + */ +int x86_pv_domain_info(struct context *ctx); + +/* + * Maps the Xen M2P. + * + * Fills ctx->x86_pv. + * - .max_mfn + * - .m2p + * + * Returns 0 on success and non-zero on error. + */ +int x86_pv_map_m2p(struct context *ctx); + +#endif +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tools/libxc/saverestore/restore.c b/tools/libxc/saverestore/restore.c index 6624baa..5834d38 100644 --- a/tools/libxc/saverestore/restore.c +++ b/tools/libxc/saverestore/restore.c @@ -12,6 +12,294 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, return -1; } +static bool pfn_is_populated(struct context *ctx, xen_pfn_t pfn) +{ + if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn ) + return false; + return test_bit(pfn, ctx->restore.populated_pfns); +} + +static int pfn_set_populated(struct context *ctx, xen_pfn_t pfn) +{ + xc_interface *xch = ctx->xch; + + if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn ) + { + unsigned long new_max_pfn = ((pfn + 1024) & ~1023) - 1; + size_t old_sz, new_sz; + unsigned long *p; + + old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1); + new_sz = bitmap_size(new_max_pfn + 1); + + p = realloc(ctx->restore.populated_pfns, new_sz); + if ( !p ) + { + PERROR("Failed to realloc populated bitmap"); + return -1; + } + + memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz); + + ctx->restore.populated_pfns = p; + ctx->restore.max_populated_pfn = new_max_pfn; + } + + set_bit(pfn, ctx->restore.populated_pfns); + + return 0; +} + +int populate_pfns(struct context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = malloc(count * sizeof *mfns), + *pfns = malloc(count * sizeof *pfns); + unsigned i, nr_pfns = 0; + int rc = -1; + + if ( !mfns || !pfns ) + { + ERROR("Failed to allocate %zu bytes for populating the physmap", + 2 * count * sizeof *mfns); + goto err; + } + + for ( i = 0; i < count; ++i ) + { + if ( types[i] != XEN_DOMCTL_PFINFO_XTAB && + types[i] != XEN_DOMCTL_PFINFO_BROKEN && + !pfn_is_populated(ctx, original_pfns[i]) ) + { + pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i]; + ++nr_pfns; + } + } + + if ( nr_pfns ) + { + rc = xc_domain_populate_physmap_exact(xch, ctx->domid, nr_pfns, 0, 0, mfns); + if ( rc ) + { + PERROR("Failed to populate physmap"); + goto err; + } + + for ( i = 0; i < nr_pfns; ++i ) + { + rc = pfn_set_populated(ctx, pfns[i]); + if ( rc ) + goto err; + ctx->ops.set_gfn(ctx, pfns[i], mfns[i]); + } + } + + rc = 0; + + err: + free(pfns); + free(mfns); + + return rc; +} + +static int process_page_data(struct context *ctx, unsigned count, + xen_pfn_t *pfns, uint32_t *types, void *page_data) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = malloc(count * sizeof *mfns); + int *map_errs = malloc(count * sizeof *map_errs); + int rc = -1; + void *mapping = NULL, *guest_page = NULL; + unsigned i, /* i indexes the pfns from the record */ + j, /* j indexes the subset of pfns we decide to map */ + nr_pages; + + if ( !mfns || !map_errs ) + { + ERROR("Failed to allocate %zu bytes to process page data", + count * (sizeof *mfns + sizeof *map_errs)); + goto err; + } + + rc = populate_pfns(ctx, count, pfns, types); + if ( rc ) + { + ERROR("Failed to populate pfns for batch of %u pages", count); + goto err; + } + rc = -1; + + for ( i = 0, nr_pages = 0; i < count; ++i ) + { + ctx->ops.set_page_type(ctx, pfns[i], types[i]); + + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_NOTAB: + + case XEN_DOMCTL_PFINFO_L1TAB: + case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L2TAB: + case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L3TAB: + case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L4TAB: + case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + mfns[nr_pages++] = ctx->ops.pfn_to_gfn(ctx, pfns[i]); + break; + } + + } + + if ( nr_pages > 0 ) + { + mapping = guest_page = xc_map_foreign_bulk( + xch, ctx->domid, PROT_READ | PROT_WRITE, + mfns, map_errs, nr_pages); + if ( !mapping ) + { + PERROR("Unable to map %u mfns for %u pages of data", + nr_pages, count); + goto err; + } + } + + for ( i = 0, j = 0; i < count; ++i ) + { + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_XTAB: + case XEN_DOMCTL_PFINFO_BROKEN: + /* Nothing at all to do */ + case XEN_DOMCTL_PFINFO_XALLOC: + /* Nothing futher to do */ + continue; + } + + if ( map_errs[j] ) + { + ERROR("Mapping pfn %lx (mfn %lx, type %#"PRIx32")failed with %d", + pfns[i], mfns[j], types[i], map_errs[j]); + goto err; + } + + memcpy(guest_page, page_data, PAGE_SIZE); + + /* Undo page normalisation done by the saver. */ + rc = ctx->ops.localise_page(ctx, types[i], guest_page); + if ( rc ) + { + DPRINTF("Failed to localise"); + goto err; + } + + ++j; + guest_page += PAGE_SIZE; + page_data += PAGE_SIZE; + } + + rc = 0; + + err: + if ( mapping ) + munmap(mapping, nr_pages * PAGE_SIZE); + + free(map_errs); + free(mfns); + + return rc; +} + +int handle_page_data(struct context *ctx, struct record *rec) +{ + xc_interface *xch = ctx->xch; + struct rec_page_data_header *pages = rec->data; + unsigned i, pages_of_data = 0; + int rc = -1; + + xen_pfn_t *pfns = NULL, pfn; + uint32_t *types = NULL, type; + + static unsigned pg_count; + pg_count++; + + if ( rec->length < sizeof *pages ) + { + ERROR("PAGE_DATA record trucated: length %"PRIu32", min %zu", + rec->length, sizeof *pages); + goto err; + } + else if ( pages->count < 1 ) + { + ERROR("Expected at least 1 pfn in PAGE_DATA record"); + goto err; + } + else if ( rec->length < sizeof *pages + (pages->count * sizeof (uint64_t)) ) + { + ERROR("PAGE_DATA record (length %"PRIu32") too short to contain %" + PRIu32" pfns worth of information", rec->length, pages->count); + goto err; + } + + pfns = malloc(pages->count * sizeof *pfns); + types = malloc(pages->count * sizeof *types); + if ( !pfns || !types ) + { + ERROR("Unable to allocate enough memory for %"PRIu32" pfns", + pages->count); + goto err; + } + + for ( i = 0; i < pages->count; ++i ) + { + pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK; + if ( !ctx->ops.pfn_is_valid(ctx, pfn) ) + { + ERROR("pfn %#lx (index %u) outside domain maximum", pfn, i); + goto err; + } + + type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32; + if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) && + ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) ) + { + ERROR("Invalid type %#lx for pfn %#lx (index %u)", type, pfn, i); + goto err; + } + else if ( type < XEN_DOMCTL_PFINFO_BROKEN ) + /* NOTAB and all L1 thru L4 tables (including pinned) should have + * a page worth of data in the record. */ + pages_of_data++; + + pfns[i] = pfn; + types[i] = type; + } + + if ( rec->length != (sizeof *pages + + (sizeof (uint64_t) * pages->count) + + (PAGE_SIZE * pages_of_data)) ) + { + ERROR("PAGE_DATA record wrong size: length %"PRIu32", expected " + "%zu + %zu + %zu", sizeof *pages, + (sizeof (uint64_t) * pages->count), (PAGE_SIZE * pages_of_data)); + goto err; + } + + rc = process_page_data(ctx, pages->count, pfns, types, + &pages->pfn[pages->count]); + err: + free(types); + free(pfns); + + return rc; +} + /* * Local variables: * mode: C diff --git a/tools/libxc/saverestore/save.c b/tools/libxc/saverestore/save.c index c013e62..e842e6c 100644 --- a/tools/libxc/saverestore/save.c +++ b/tools/libxc/saverestore/save.c @@ -1,5 +1,47 @@ +#include <arpa/inet.h> + #include "common.h" +int write_headers(struct context *ctx, uint16_t guest_type) +{ + xc_interface *xch = ctx->xch; + int32_t xen_version = xc_version(xch, XENVER_version, NULL); + struct ihdr ihdr = + { + .marker = IHDR_MARKER, + .id = htonl(IHDR_ID), + .version = htonl(IHDR_VERSION), + .options = htons(IHDR_OPT_LITTLE_ENDIAN), + }; + struct dhdr dhdr = + { + .type = guest_type, + .page_shift = 12, + .xen_major = (xen_version >> 16) & 0xffff, + .xen_minor = (xen_version) & 0xffff, + }; + + if ( xen_version < 0 ) + { + PERROR("Unable to obtain Xen Version"); + return -1; + } + + if ( write_exact(ctx->fd, &ihdr, sizeof ihdr) ) + { + PERROR("Unable to write Image Header to stream"); + return -1; + } + + if ( write_exact(ctx->fd, &dhdr, sizeof dhdr) ) + { + PERROR("Unable to write Domain Header to stream"); + return -1; + } + + return 0; +} + int xc_domain_save2(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, struct save_callbacks* callbacks, int hvm, -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |