|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4 5/9] tools/libxc: common code
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
tools/libxc/saverestore/common.c | 87 ++++++
tools/libxc/saverestore/common.h | 172 ++++++++++++
tools/libxc/saverestore/common_x86.c | 54 ++++
tools/libxc/saverestore/common_x86.h | 21 ++
tools/libxc/saverestore/common_x86_hvm.c | 53 ++++
tools/libxc/saverestore/common_x86_pv.c | 431 ++++++++++++++++++++++++++++++
tools/libxc/saverestore/common_x86_pv.h | 104 +++++++
tools/libxc/saverestore/restore.c | 288 ++++++++++++++++++++
tools/libxc/saverestore/save.c | 42 +++
9 files changed, 1252 insertions(+)
create mode 100644 tools/libxc/saverestore/common_x86.c
create mode 100644 tools/libxc/saverestore/common_x86.h
create mode 100644 tools/libxc/saverestore/common_x86_hvm.c
create mode 100644 tools/libxc/saverestore/common_x86_pv.c
create mode 100644 tools/libxc/saverestore/common_x86_pv.h
diff --git a/tools/libxc/saverestore/common.c b/tools/libxc/saverestore/common.c
index de2e727..b159c4c 100644
--- a/tools/libxc/saverestore/common.c
+++ b/tools/libxc/saverestore/common.c
@@ -1,3 +1,5 @@
+#include <assert.h>
+
#include "common.h"
static const char *dhdr_types[] =
@@ -52,6 +54,91 @@ const char *rec_type_to_str(uint32_t type)
return "Reserved";
}
+int write_split_record(struct context *ctx, struct record *rec,
+ void *buf, size_t sz)
+{
+ static const char zeroes[7] = { 0 };
+ xc_interface *xch = ctx->xch;
+ uint32_t combined_length = rec->length + sz;
+ size_t record_length = (combined_length + 7) & ~7UL;
+
+ if ( record_length > REC_LENGTH_MAX )
+ {
+ ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32
+ " exceeds max (0x%"PRIx32")", rec->type,
+ rec_type_to_str(rec->type), rec->length, REC_LENGTH_MAX);
+ return -1;
+ }
+
+ if ( rec->length )
+ assert(rec->data);
+ if ( sz )
+ assert(buf);
+
+ if ( write_exact(ctx->fd, &rec->type, sizeof rec->type) ||
+ write_exact(ctx->fd, &combined_length, sizeof rec->length) ||
+ (rec->length && write_exact(ctx->fd, rec->data, rec->length)) ||
+ (sz && write_exact(ctx->fd, buf, sz)) ||
+ write_exact(ctx->fd, zeroes, record_length - combined_length) )
+ {
+ PERROR("Unable to write record to stream");
+ return -1;
+ }
+
+ return 0;
+}
+
+int read_record(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rhdr rhdr;
+ size_t datasz;
+
+ if ( read_exact(ctx->fd, &rhdr, sizeof rhdr) )
+ {
+ PERROR("Failed to read Record Header from stream");
+ return -1;
+ }
+ else if ( rhdr.length > REC_LENGTH_MAX )
+ {
+ ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32
+ " exceeds max (0x%"PRIx32")",
+ rhdr.type, rec_type_to_str(rhdr.type),
+ rhdr.length, REC_LENGTH_MAX);
+ return -1;
+ }
+
+ datasz = (rhdr.length + 7) & ~7U;
+
+ if ( datasz )
+ {
+ rec->data = malloc(datasz);
+
+ if ( !rec->data )
+ {
+ ERROR("Unable to allocate %zu bytes for record data
(0x%08"PRIx32", %s)",
+ datasz, rhdr.type, rec_type_to_str(rhdr.type));
+ return -1;
+ }
+
+ if ( read_exact(ctx->fd, rec->data, datasz) )
+ {
+ free(rec->data);
+ rec->data = NULL;
+ PERROR("Failed to read %zu bytes of data for record
(0x%08"PRIx32", %s)",
+ datasz, rhdr.type, rec_type_to_str(rhdr.type));
+ return -1;
+ }
+ }
+ else
+ rec->data = NULL;
+
+ rec->type = rhdr.type;
+ rec->length = rhdr.length;
+
+ return 0;
+};
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
index fff0a39..a35eda7 100644
--- a/tools/libxc/saverestore/common.h
+++ b/tools/libxc/saverestore/common.h
@@ -1,7 +1,20 @@
#ifndef __COMMON__H
#define __COMMON__H
+#include <stdbool.h>
+
+// Hack out junk from the namespace
+#define mfn_to_pfn __UNUSED_mfn_to_pfn
+#define pfn_to_mfn __UNUSED_pfn_to_mfn
+
#include "../xg_private.h"
+#include "../xg_save_restore.h"
+#include "../xc_dom.h"
+#include "../xc_bitops.h"
+
+#undef mfn_to_pfn
+#undef pfn_to_mfn
+
#include "stream_format.h"
@@ -11,6 +24,165 @@
const char *dhdr_type_to_str(uint32_t type);
const char *rec_type_to_str(uint32_t type);
+struct context;
+
+struct save_restore_ops
+{
+ bool (*pfn_is_valid)(struct context *ctx, xen_pfn_t pfn);
+ xen_pfn_t (*pfn_to_gfn)(struct context *ctx, xen_pfn_t pfn);
+ void (*set_gfn)(struct context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
+ void (*set_page_type)(struct context *ctx, xen_pfn_t pfn, xen_pfn_t type);
+ int (*normalise_page)(struct context *ctx, xen_pfn_t type, void **page);
+ int (*localise_page)(struct context *ctx, uint32_t type, void *page);
+};
+
+struct context
+{
+ xc_interface *xch;
+ uint32_t domid;
+ int fd;
+
+ xc_dominfo_t dominfo;
+
+ struct save_restore_ops ops;
+
+ union
+ {
+ struct
+ {
+ /* From Image Header */
+ uint32_t format_version;
+
+ /* From Domain Header */
+ uint32_t guest_type;
+ uint32_t guest_page_size;
+
+ unsigned long xenstore_mfn, console_mfn;
+ unsigned int xenstore_evtchn, console_evtchn;
+ domid_t xenstore_domid, console_domid;
+
+ struct restore_callbacks *callbacks;
+
+ /* Bitmap of currently populated PFNs during restore. */
+ unsigned long *populated_pfns;
+ unsigned int max_populated_pfn;
+ } restore;
+
+ struct
+ {
+ unsigned long p2m_size;
+
+ struct save_callbacks *callbacks;
+ } save;
+ };
+
+ xen_pfn_t *batch_pfns;
+ unsigned nr_batch_pfns;
+ unsigned long *deferred_pages;
+
+ union
+ {
+ struct
+ {
+ /* 4 or 8; 32 or 64 bit domain */
+ unsigned int width;
+ /* 3 or 4 pagetable levels */
+ unsigned int levels;
+
+
+ /* Maximum Xen frame */
+ unsigned long max_mfn;
+ /* Read-only machine to phys map */
+ xen_pfn_t *m2p;
+ /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
+ xen_pfn_t compat_m2p_mfn0;
+ /* Number of m2p frames mapped */
+ unsigned long nr_m2p_frames;
+
+
+ /* Maximum guest frame */
+ unsigned long max_pfn;
+ /* Frames per page in guest p2m */
+ unsigned int fpp;
+
+ /* Number of frames making up the p2m */
+ unsigned int p2m_frames;
+ /* Guest's phys to machine map. Mapped read-only (save) or
+ * allocated locally (restore). Uses guest unsigned longs. */
+ void *p2m;
+ /* The guest pfns containing the p2m leaves */
+ xen_pfn_t *p2m_pfns;
+ /* Types for each page */
+ uint32_t *pfn_types;
+
+ /* Read-only mapping of guests shared info page */
+ shared_info_any_t *shinfo;
+ } x86_pv;
+ };
+};
+
+/*
+ * Write the image and domain headers to the stream.
+ * (to eventually make static in save.c)
+ */
+int write_headers(struct context *ctx, uint16_t guest_type);
+
+extern struct save_restore_ops save_restore_ops_x86_pv;
+extern struct save_restore_ops save_restore_ops_x86_hvm;
+
+struct record
+{
+ uint32_t type;
+ uint32_t length;
+ void *data;
+};
+
+/*
+ * Writes a split record to the stream, applying correct padding where
+ * appropriate. It is common when sending records containing blobs from Xen
+ * that the header and blob data are separate. This function accepts a second
+ * buffer and length, and will merge it with the main record when sending.
+ *
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+int write_split_record(struct context *ctx, struct record *rec, void *buf,
size_t sz);
+
+/*
+ * Writes a record to the stream, applying correct padding where appropriate.
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+static inline int write_record(struct context *ctx, struct record *rec)
+{
+ return write_split_record(ctx, rec, NULL, 0);
+}
+
+/*
+ * Reads a record from the stream, and fills in the record structure.
+ *
+ * Returns 0 on success and non-0 on failure.
+ *
+ * On success, the records type and size shall be valid.
+ * - If size is 0, data shall be NULL.
+ * - If size is non-0, data shall be a buffer allocated by malloc() which must
+ * be passed to free() by the caller.
+ *
+ * On failure, the contents of the record structure are undefined.
+ */
+int read_record(struct context *ctx, struct record *rec);
+
+int write_page_data_and_pause(struct context *ctx);
+
+int handle_page_data(struct context *ctx, struct record *rec);
+
+int populate_pfns(struct context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns, const uint32_t *types);
+
#endif
/*
* Local variables:
diff --git a/tools/libxc/saverestore/common_x86.c
b/tools/libxc/saverestore/common_x86.c
new file mode 100644
index 0000000..0a3d555
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86.c
@@ -0,0 +1,54 @@
+#include "common_x86.h"
+
+int write_tsc_info(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_tsc_info tsc = { 0 };
+ struct record rec =
+ {
+ .type = REC_TYPE_tsc_info,
+ .length = sizeof tsc,
+ .data = &tsc
+ };
+
+ if ( xc_domain_get_tsc_info(xch, ctx->domid, &tsc.mode,
+ &tsc.nsec, &tsc.khz, &tsc.incarnation) < 0 )
+ {
+ PERROR("Unable to obtain TSC information");
+ return -1;
+ }
+
+ return write_record(ctx, &rec);
+}
+
+int handle_tsc_info(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_tsc_info *tsc = rec->data;
+
+ if ( rec->length != sizeof *tsc )
+ {
+ ERROR("TSC_INFO record wrong size: length %"PRIu32", expected %zu",
+ rec->length, sizeof *tsc);
+ return -1;
+ }
+
+ if ( xc_domain_set_tsc_info(xch, ctx->domid, tsc->mode,
+ tsc->nsec, tsc->khz, tsc->incarnation) )
+ {
+ PERROR("Unable to set TSC information");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86.h
b/tools/libxc/saverestore/common_x86.h
new file mode 100644
index 0000000..429532a
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86.h
@@ -0,0 +1,21 @@
+#ifndef __COMMON_X86__H
+#define __COMMON_X86__H
+
+#include "common.h"
+
+/* Obtains and writes domain TSC information to the stream */
+int write_tsc_info(struct context *ctx);
+
+/* Parses domain TSC information from the stream */
+int handle_tsc_info(struct context *ctx, struct record *rec);
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86_hvm.c
b/tools/libxc/saverestore/common_x86_hvm.c
new file mode 100644
index 0000000..0b9aac2
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86_hvm.c
@@ -0,0 +1,53 @@
+#include "common.h"
+
+static bool x86_hvm_pfn_is_valid(struct context *ctx, xen_pfn_t pfn)
+{
+ return true;
+}
+
+static xen_pfn_t x86_hvm_pfn_to_gfn(struct context *ctx, xen_pfn_t pfn)
+{
+ return pfn;
+}
+
+static void x86_hvm_set_gfn(struct context *ctx, xen_pfn_t pfn,
+ xen_pfn_t gfn)
+{
+ /* no op */
+}
+
+static void x86_hvm_set_page_type(struct context *ctx, xen_pfn_t pfn,
xen_pfn_t type)
+{
+ /* no-op */
+}
+
+static int x86_hvm_normalise_page(struct context *ctx, xen_pfn_t type, void
**page)
+{
+ /* no-op */
+ return 0;
+}
+
+static int x86_hvm_localise_page(struct context *ctx, uint32_t type, void
*page)
+{
+ /* no-op */
+ return 0;
+}
+
+struct save_restore_ops save_restore_ops_x86_hvm = {
+ .pfn_is_valid = x86_hvm_pfn_is_valid,
+ .pfn_to_gfn = x86_hvm_pfn_to_gfn,
+ .set_gfn = x86_hvm_set_gfn,
+ .set_page_type = x86_hvm_set_page_type,
+ .normalise_page = x86_hvm_normalise_page,
+ .localise_page = x86_hvm_localise_page
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86_pv.c
b/tools/libxc/saverestore/common_x86_pv.c
new file mode 100644
index 0000000..35bce27
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86_pv.c
@@ -0,0 +1,431 @@
+#include <assert.h>
+
+#include "common_x86_pv.h"
+
+xen_pfn_t mfn_to_pfn(struct context *ctx, xen_pfn_t mfn)
+{
+ assert(mfn <= ctx->x86_pv.max_mfn);
+ return ctx->x86_pv.m2p[mfn];
+}
+
+static bool x86_pv_pfn_is_valid(struct context *ctx, xen_pfn_t pfn)
+{
+ return pfn <= ctx->x86_pv.max_pfn;
+}
+
+static xen_pfn_t x86_pv_pfn_to_gfn(struct context *ctx, xen_pfn_t pfn)
+{
+ assert(pfn <= ctx->x86_pv.max_pfn);
+
+ if ( ctx->x86_pv.width == sizeof (uint64_t) )
+ /* 64 bit guest. Need to truncate their pfns for 32 bit toolstacks */
+ return ((uint64_t *)ctx->x86_pv.p2m)[pfn];
+ else
+ {
+ /* 32 bit guest. Need to expand INVALID_MFN fot 64 bit toolstacks */
+ uint32_t mfn = ((uint32_t *)ctx->x86_pv.p2m)[pfn];
+
+ return mfn == ~0U ? INVALID_MFN : mfn;
+ }
+}
+
+static void x86_pv_set_page_type(struct context *ctx, xen_pfn_t pfn,
+ unsigned long type)
+{
+ assert(pfn <= ctx->x86_pv.max_pfn);
+
+ ctx->x86_pv.pfn_types[pfn] = type;
+}
+
+static void x86_pv_set_gfn(struct context *ctx, xen_pfn_t pfn,
+ xen_pfn_t mfn)
+{
+ assert(pfn <= ctx->x86_pv.max_pfn);
+
+ if ( ctx->x86_pv.width == sizeof (uint64_t) )
+ /* 64 bit guest. Need to expand INVALID_MFN for 32 bit toolstacks */
+ ((uint64_t *)ctx->x86_pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn;
+ else
+ /* 32 bit guest. Can safely truncate INVALID_MFN fot 64 bit
toolstacks */
+ ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn;
+}
+
+static int normalise_pagetable(struct context *ctx, const uint64_t *src,
+ uint64_t *dst, unsigned long type)
+{
+ xc_interface *xch = ctx->xch;
+ uint64_t pte;
+ unsigned i, xen_first = -1, xen_last = -1; /* Indicies of Xen mappings */
+
+ type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+ if ( ctx->x86_pv.levels == 4 )
+ {
+ /* 64bit guests only have Xen mappings in their L4 tables */
+ if ( type == XEN_DOMCTL_PFINFO_L4TAB )
+ {
+ xen_first = 256;
+ xen_last = 271;
+ }
+ }
+ else
+ {
+ switch ( type )
+ {
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ ERROR("??? Found L4 table for 32bit guest");
+ errno = EINVAL;
+ return -1;
+
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ /* 32bit guests can only use the first 4 entries of their L3
tables.
+ * All other are potentially used by Xen. */
+ xen_first = 4;
+ xen_last = 512;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ /* It is hard to spot Xen mappings in a 32bit guest's L2. Most
+ * are normal but only a few will have Xen mappings.
+ *
+ * 428 = (HYPERVISOR_VIRT_START_PAE >> L2_PAGETABLE_SHIFT_PAE) &
0x1ff
+ *
+ * ...which is conveniently unavailable to us in a 64bit build.
+ */
+ if ( pte_to_frame(ctx, src[428]) == ctx->x86_pv.compat_m2p_mfn0 )
+ {
+ xen_first = 428;
+ xen_last = 512;
+ }
+ break;
+ }
+ }
+
+ for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+ {
+ xen_pfn_t mfn, pfn;
+
+ pte = src[i];
+
+ /* Remove Xen mappings: Xen will reconstruct on the other side */
+ if ( i >= xen_first && i <= xen_last )
+ pte = 0;
+
+ if ( pte & _PAGE_PRESENT )
+ {
+ mfn = pte_to_frame(ctx, pte);
+
+ if ( pte & _PAGE_PSE )
+ {
+ ERROR("Cannot migrate superpage (L%lu[%u]: 0x%016"PRIx64")",
+ type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
+ errno = E2BIG;
+ return -1;
+ }
+
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ /* This is expected during the live part of migration given
+ * split pagetable updates, active grant mappings etc. The
+ * pagetable will need to be resent after pausing. It is
+ * however fatal if we have already paused the domain. */
+ if ( !ctx->dominfo.paused )
+ errno = EAGAIN;
+ else
+ {
+ ERROR("Bad MFN for L%lu[%u]",
+ type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
+ pseudophysmap_walk(ctx, mfn);
+ errno = ERANGE;
+ }
+ return -1;
+ }
+ else
+ pfn = mfn_to_pfn(ctx, mfn);
+
+ update_pte(ctx, &pte, pfn);
+ }
+
+ dst[i] = pte;
+ }
+
+ return 0;
+}
+
+static int x86_pv_normalise_page(struct context *ctx, xen_pfn_t type,
+ void **page)
+{
+ xc_interface *xch = ctx->xch;
+ void *local_page;
+ int rc;
+
+ type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+ if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+ return 0;
+
+ local_page = malloc(PAGE_SIZE);
+ if ( !local_page )
+ {
+ ERROR("Unable to allocate scratch page");
+ rc = -1;
+ goto out;
+ }
+
+ rc = normalise_pagetable(ctx, *page, local_page, type);
+ *page = local_page;
+
+ out:
+ return rc;
+}
+
+static int x86_pv_localise_page(struct context *ctx, uint32_t type, void *page)
+{
+ xc_interface *xch = ctx->xch;
+ uint64_t *table = page;
+ uint64_t pte;
+ unsigned i;
+
+ type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+ /* Only page tables need localisation. */
+ if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+ return 0;
+
+ for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+ {
+ pte = table[i];
+
+ if ( pte & _PAGE_PRESENT )
+ {
+ xen_pfn_t mfn, pfn;
+
+ pfn = pte_to_frame(ctx, pte);
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+
+ if ( mfn == INVALID_MFN )
+ {
+ if ( populate_pfns(ctx, 1, &pfn, &type) )
+ return -1;
+
+ mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+ }
+
+ if ( !mfn_in_pseudophysmap(ctx, mfn) )
+ {
+ ERROR("Bad MFN for L%lu[%u]",
+ type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
+ pseudophysmap_walk(ctx, mfn);
+ errno = ERANGE;
+ return -1;
+ }
+
+ update_pte(ctx, &pte, mfn);
+
+ table[i] = pte;
+ }
+ }
+
+ return 0;
+}
+
+struct save_restore_ops save_restore_ops_x86_pv = {
+ .pfn_is_valid = x86_pv_pfn_is_valid,
+ .pfn_to_gfn = x86_pv_pfn_to_gfn,
+ .set_page_type = x86_pv_set_page_type,
+ .set_gfn = x86_pv_set_gfn,
+ .normalise_page = x86_pv_normalise_page,
+ .localise_page = x86_pv_localise_page,
+};
+
+bool mfn_in_pseudophysmap(struct context *ctx, xen_pfn_t mfn)
+{
+ return ( (mfn <= ctx->x86_pv.max_mfn) &&
+ (mfn_to_pfn(ctx, mfn) <= ctx->x86_pv.max_pfn) &&
+ (ctx->ops.pfn_to_gfn(ctx, mfn_to_pfn(ctx, mfn) == mfn)) );
+}
+
+void pseudophysmap_walk(struct context *ctx, xen_pfn_t mfn)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t pfn = ~0UL;
+
+ ERROR("mfn %#lx, max %#lx", mfn, ctx->x86_pv.max_mfn);
+
+ if ( (mfn != ~0UL) && (mfn <= ctx->x86_pv.max_mfn) )
+ {
+ pfn = ctx->x86_pv.m2p[mfn];
+ ERROR(" m2p[%#lx] = %#lx, max_pfn %#lx",
+ mfn, pfn, ctx->x86_pv.max_pfn);
+ }
+
+ if ( (pfn != ~0UL) && (pfn <= ctx->x86_pv.max_pfn) )
+ ERROR(" p2m[%#lx] = %#lx",
+ pfn, ctx->ops.pfn_to_gfn(ctx, pfn));
+}
+
+xen_pfn_t cr3_to_mfn(struct context *ctx, uint64_t cr3)
+{
+ if ( ctx->x86_pv.width == 8 )
+ return cr3 >> 12;
+ else
+ return (((uint32_t)cr3 >> 12) | ((uint32_t)cr3 << 20));
+}
+
+uint64_t mfn_to_cr3(struct context *ctx, xen_pfn_t mfn)
+{
+ if ( ctx->x86_pv.width == 8 )
+ return ((uint64_t)mfn) << 12;
+ else
+ return (((uint32_t)mfn << 12) | ((uint32_t)mfn >> 20));
+}
+
+int x86_pv_domain_info(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ unsigned int guest_width, guest_levels, fpp;
+ int max_pfn;
+
+ /* Get the domain width */
+ if ( xc_domain_get_guest_width(xch, ctx->domid, &guest_width) )
+ {
+ PERROR("Unable to determine dom%d's width", ctx->domid);
+ return -1;
+ }
+ else if ( guest_width == 4 )
+ guest_levels = 3;
+ else if ( guest_width == 8 )
+ guest_levels = 4;
+ else
+ {
+ ERROR("Invalid guest width %d. Expected 32 or 64", guest_width);
+ return -1;
+ }
+ ctx->x86_pv.width = guest_width;
+ ctx->x86_pv.levels = guest_levels;
+ ctx->x86_pv.fpp = fpp = PAGE_SIZE / ctx->x86_pv.width;
+
+ DPRINTF("%d bits, %d levels", guest_width * 8, guest_levels);
+
+ /* Get the domains maximum pfn */
+ max_pfn = xc_domain_maximum_gpfn(xch, ctx->domid);
+ if ( max_pfn < 0 )
+ {
+ PERROR("Unable to obtain guests max pfn");
+ return -1;
+ }
+ else if ( max_pfn >= ~XEN_DOMCTL_PFINFO_LTAB_MASK )
+ {
+ errno = E2BIG;
+ PERROR("Cannot save a guest this large %#x");
+ return -1;
+ }
+ else if ( max_pfn > 0 )
+ {
+ ctx->x86_pv.max_pfn = max_pfn;
+ ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp;
+
+ DPRINTF("max_pfn %#x, p2m_frames %d", max_pfn, ctx->x86_pv.p2m_frames);
+ }
+
+ return 0;
+}
+
+int x86_pv_map_m2p(struct context *ctx)
+{
+ xc_interface *xch = ctx->xch;
+ long max_page = xc_maximum_ram_page(xch);
+ unsigned long m2p_chunks, m2p_size;
+ privcmd_mmap_entry_t *entries = NULL;
+ xen_pfn_t *extents_start = NULL;
+ int rc = -1, i;
+
+ if ( max_page < 0 )
+ {
+ PERROR("Failed to get maximum ram page");
+ goto err;
+ }
+
+ ctx->x86_pv.max_mfn = max_page;
+ m2p_size = M2P_SIZE(ctx->x86_pv.max_mfn);
+ m2p_chunks = M2P_CHUNKS(ctx->x86_pv.max_mfn);
+
+ extents_start = malloc(m2p_chunks * sizeof(xen_pfn_t));
+ if ( !extents_start )
+ {
+ ERROR("Unable to allocate %zu bytes for m2p mfns",
+ m2p_chunks * sizeof(xen_pfn_t));
+ goto err;
+ }
+
+ if ( xc_machphys_mfn_list(xch, m2p_chunks, extents_start) )
+ {
+ PERROR("Failed to get m2p mfn list");
+ goto err;
+ }
+
+ entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t));
+ if ( !entries )
+ {
+ ERROR("Unable to allocate %zu bytes for m2p mapping mfns",
+ m2p_chunks * sizeof(privcmd_mmap_entry_t));
+ goto err;
+ }
+
+ for ( i = 0; i < m2p_chunks; ++i )
+ entries[i].mfn = extents_start[i];
+
+ ctx->x86_pv.m2p = xc_map_foreign_ranges(
+ xch, DOMID_XEN, m2p_size, PROT_READ,
+ M2P_CHUNK_SIZE, entries, m2p_chunks);
+
+ if ( !ctx->x86_pv.m2p )
+ {
+ PERROR("Failed to mmap m2p ranges");
+ goto err;
+ }
+
+ ctx->x86_pv.nr_m2p_frames = (M2P_CHUNK_SIZE >> PAGE_SHIFT) * m2p_chunks;
+
+#ifdef __i386__
+ /* 32 bit toolstacks automatically get the compat m2p */
+ ctx->x86_pv.compat_m2p_mfn0 = entries[0].mfn;
+#else
+ /* 64 bit toolstacks need to ask Xen specially for it */
+ {
+ struct xen_machphys_mfn_list xmml = {
+ .max_extents = 1,
+ .extent_start = { &ctx->x86_pv.compat_m2p_mfn0 }
+ };
+
+ rc = do_memory_op(xch, XENMEM_machphys_compat_mfn_list,
+ &xmml, sizeof xmml);
+ if ( rc || xmml.nr_extents != 1 )
+ {
+ PERROR("Failed to get compat mfn list from Xen");
+ rc = -1;
+ goto err;
+ }
+ }
+#endif
+
+ /* All Done */
+ rc = 0;
+ DPRINTF("max_mfn %#lx", ctx->x86_pv.max_mfn);
+
+err:
+ free(entries);
+ free(extents_start);
+
+ return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86_pv.h
b/tools/libxc/saverestore/common_x86_pv.h
new file mode 100644
index 0000000..c7315b6
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86_pv.h
@@ -0,0 +1,104 @@
+#ifndef __COMMON_X86_PV_H
+#define __COMMON_X86_PV_H
+
+#include "common_x86.h"
+
+/*
+ * Convert an mfn to a pfn, given Xens m2p table.
+ *
+ * Caller must ensure that the requested mfn is in range.
+ */
+xen_pfn_t mfn_to_pfn(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Convert a pfn to an mfn, given the guests p2m table.
+ *
+ * Caller must ensure that the requested pfn is in range.
+ */
+xen_pfn_t pfn_to_mfn(struct context *ctx, xen_pfn_t pfn);
+
+/*
+ * Set a mapping in the p2m table.
+ *
+ * Caller must ensure that the requested pfn is in range.
+ */
+void set_p2m(struct context *ctx, xen_pfn_t pfn, xen_pfn_t mfn);
+
+/*
+ * Query whether a particular mfn is valid in the physmap of a guest.
+ */
+bool mfn_in_pseudophysmap(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Debug a particular mfn by walking the p2m and m2p.
+ */
+void pseudophysmap_walk(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Convert a PV cr3 field to an mfn.
+ */
+xen_pfn_t cr3_to_mfn(struct context *ctx, uint64_t cr3);
+
+/*
+ * Convert an mfn to a PV cr3 field.
+ */
+uint64_t mfn_to_cr3(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Extract an MFN from a Pagetable Entry.
+ */
+static inline xen_pfn_t pte_to_frame(struct context *ctx, uint64_t pte)
+{
+ if ( ctx->x86_pv.width == 8 )
+ return (pte >> PAGE_SHIFT) & ((1ULL << (52 - PAGE_SHIFT)) - 1);
+ else
+ return (pte >> PAGE_SHIFT) & ((1ULL << (44 - PAGE_SHIFT)) - 1);
+}
+
+static inline void update_pte(struct context *ctx, uint64_t *pte, xen_pfn_t
pfn)
+{
+ if ( ctx->x86_pv.width == 8 )
+ *pte &= ~(((1ULL << (52 - PAGE_SHIFT)) - 1) << PAGE_SHIFT);
+ else
+ *pte &= ~(((1ULL << (44 - PAGE_SHIFT)) - 1) << PAGE_SHIFT);
+
+ *pte |= (uint64_t)pfn << PAGE_SHIFT;
+}
+
+/*
+ * Get current domain information.
+ *
+ * Fills ctx->x86_pv
+ * - .width
+ * - .levels
+ * - .fpp
+ * - .p2m_frames
+ *
+ * Used by the save side to create the X86_PV_INFO record, and by the restore
+ * side to verify the incoming stream.
+ *
+ * Returns 0 on success and non-zero on error.
+ */
+int x86_pv_domain_info(struct context *ctx);
+
+/*
+ * Maps the Xen M2P.
+ *
+ * Fills ctx->x86_pv.
+ * - .max_mfn
+ * - .m2p
+ *
+ * Returns 0 on success and non-zero on error.
+ */
+int x86_pv_map_m2p(struct context *ctx);
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/restore.c
b/tools/libxc/saverestore/restore.c
index 6624baa..5834d38 100644
--- a/tools/libxc/saverestore/restore.c
+++ b/tools/libxc/saverestore/restore.c
@@ -12,6 +12,294 @@ int xc_domain_restore2(xc_interface *xch, int io_fd,
uint32_t dom,
return -1;
}
+static bool pfn_is_populated(struct context *ctx, xen_pfn_t pfn)
+{
+ if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn )
+ return false;
+ return test_bit(pfn, ctx->restore.populated_pfns);
+}
+
+static int pfn_set_populated(struct context *ctx, xen_pfn_t pfn)
+{
+ xc_interface *xch = ctx->xch;
+
+ if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn )
+ {
+ unsigned long new_max_pfn = ((pfn + 1024) & ~1023) - 1;
+ size_t old_sz, new_sz;
+ unsigned long *p;
+
+ old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
+ new_sz = bitmap_size(new_max_pfn + 1);
+
+ p = realloc(ctx->restore.populated_pfns, new_sz);
+ if ( !p )
+ {
+ PERROR("Failed to realloc populated bitmap");
+ return -1;
+ }
+
+ memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
+
+ ctx->restore.populated_pfns = p;
+ ctx->restore.max_populated_pfn = new_max_pfn;
+ }
+
+ set_bit(pfn, ctx->restore.populated_pfns);
+
+ return 0;
+}
+
+int populate_pfns(struct context *ctx, unsigned count,
+ const xen_pfn_t *original_pfns, const uint32_t *types)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t *mfns = malloc(count * sizeof *mfns),
+ *pfns = malloc(count * sizeof *pfns);
+ unsigned i, nr_pfns = 0;
+ int rc = -1;
+
+ if ( !mfns || !pfns )
+ {
+ ERROR("Failed to allocate %zu bytes for populating the physmap",
+ 2 * count * sizeof *mfns);
+ goto err;
+ }
+
+ for ( i = 0; i < count; ++i )
+ {
+ if ( types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+ types[i] != XEN_DOMCTL_PFINFO_BROKEN &&
+ !pfn_is_populated(ctx, original_pfns[i]) )
+ {
+ pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
+ ++nr_pfns;
+ }
+ }
+
+ if ( nr_pfns )
+ {
+ rc = xc_domain_populate_physmap_exact(xch, ctx->domid, nr_pfns, 0, 0,
mfns);
+ if ( rc )
+ {
+ PERROR("Failed to populate physmap");
+ goto err;
+ }
+
+ for ( i = 0; i < nr_pfns; ++i )
+ {
+ rc = pfn_set_populated(ctx, pfns[i]);
+ if ( rc )
+ goto err;
+ ctx->ops.set_gfn(ctx, pfns[i], mfns[i]);
+ }
+ }
+
+ rc = 0;
+
+ err:
+ free(pfns);
+ free(mfns);
+
+ return rc;
+}
+
+static int process_page_data(struct context *ctx, unsigned count,
+ xen_pfn_t *pfns, uint32_t *types, void *page_data)
+{
+ xc_interface *xch = ctx->xch;
+ xen_pfn_t *mfns = malloc(count * sizeof *mfns);
+ int *map_errs = malloc(count * sizeof *map_errs);
+ int rc = -1;
+ void *mapping = NULL, *guest_page = NULL;
+ unsigned i, /* i indexes the pfns from the record */
+ j, /* j indexes the subset of pfns we decide to map */
+ nr_pages;
+
+ if ( !mfns || !map_errs )
+ {
+ ERROR("Failed to allocate %zu bytes to process page data",
+ count * (sizeof *mfns + sizeof *map_errs));
+ goto err;
+ }
+
+ rc = populate_pfns(ctx, count, pfns, types);
+ if ( rc )
+ {
+ ERROR("Failed to populate pfns for batch of %u pages", count);
+ goto err;
+ }
+ rc = -1;
+
+ for ( i = 0, nr_pages = 0; i < count; ++i )
+ {
+ ctx->ops.set_page_type(ctx, pfns[i], types[i]);
+
+ switch ( types[i] )
+ {
+ case XEN_DOMCTL_PFINFO_NOTAB:
+
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+ mfns[nr_pages++] = ctx->ops.pfn_to_gfn(ctx, pfns[i]);
+ break;
+ }
+
+ }
+
+ if ( nr_pages > 0 )
+ {
+ mapping = guest_page = xc_map_foreign_bulk(
+ xch, ctx->domid, PROT_READ | PROT_WRITE,
+ mfns, map_errs, nr_pages);
+ if ( !mapping )
+ {
+ PERROR("Unable to map %u mfns for %u pages of data",
+ nr_pages, count);
+ goto err;
+ }
+ }
+
+ for ( i = 0, j = 0; i < count; ++i )
+ {
+ switch ( types[i] )
+ {
+ case XEN_DOMCTL_PFINFO_XTAB:
+ case XEN_DOMCTL_PFINFO_BROKEN:
+ /* Nothing at all to do */
+ case XEN_DOMCTL_PFINFO_XALLOC:
+ /* Nothing futher to do */
+ continue;
+ }
+
+ if ( map_errs[j] )
+ {
+ ERROR("Mapping pfn %lx (mfn %lx, type %#"PRIx32")failed with %d",
+ pfns[i], mfns[j], types[i], map_errs[j]);
+ goto err;
+ }
+
+ memcpy(guest_page, page_data, PAGE_SIZE);
+
+ /* Undo page normalisation done by the saver. */
+ rc = ctx->ops.localise_page(ctx, types[i], guest_page);
+ if ( rc )
+ {
+ DPRINTF("Failed to localise");
+ goto err;
+ }
+
+ ++j;
+ guest_page += PAGE_SIZE;
+ page_data += PAGE_SIZE;
+ }
+
+ rc = 0;
+
+ err:
+ if ( mapping )
+ munmap(mapping, nr_pages * PAGE_SIZE);
+
+ free(map_errs);
+ free(mfns);
+
+ return rc;
+}
+
+int handle_page_data(struct context *ctx, struct record *rec)
+{
+ xc_interface *xch = ctx->xch;
+ struct rec_page_data_header *pages = rec->data;
+ unsigned i, pages_of_data = 0;
+ int rc = -1;
+
+ xen_pfn_t *pfns = NULL, pfn;
+ uint32_t *types = NULL, type;
+
+ static unsigned pg_count;
+ pg_count++;
+
+ if ( rec->length < sizeof *pages )
+ {
+ ERROR("PAGE_DATA record trucated: length %"PRIu32", min %zu",
+ rec->length, sizeof *pages);
+ goto err;
+ }
+ else if ( pages->count < 1 )
+ {
+ ERROR("Expected at least 1 pfn in PAGE_DATA record");
+ goto err;
+ }
+ else if ( rec->length < sizeof *pages + (pages->count * sizeof (uint64_t))
)
+ {
+ ERROR("PAGE_DATA record (length %"PRIu32") too short to contain %"
+ PRIu32" pfns worth of information", rec->length, pages->count);
+ goto err;
+ }
+
+ pfns = malloc(pages->count * sizeof *pfns);
+ types = malloc(pages->count * sizeof *types);
+ if ( !pfns || !types )
+ {
+ ERROR("Unable to allocate enough memory for %"PRIu32" pfns",
+ pages->count);
+ goto err;
+ }
+
+ for ( i = 0; i < pages->count; ++i )
+ {
+ pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
+ if ( !ctx->ops.pfn_is_valid(ctx, pfn) )
+ {
+ ERROR("pfn %#lx (index %u) outside domain maximum", pfn, i);
+ goto err;
+ }
+
+ type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
+ if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
+ ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
+ {
+ ERROR("Invalid type %#lx for pfn %#lx (index %u)", type, pfn, i);
+ goto err;
+ }
+ else if ( type < XEN_DOMCTL_PFINFO_BROKEN )
+ /* NOTAB and all L1 thru L4 tables (including pinned) should have
+ * a page worth of data in the record. */
+ pages_of_data++;
+
+ pfns[i] = pfn;
+ types[i] = type;
+ }
+
+ if ( rec->length != (sizeof *pages +
+ (sizeof (uint64_t) * pages->count) +
+ (PAGE_SIZE * pages_of_data)) )
+ {
+ ERROR("PAGE_DATA record wrong size: length %"PRIu32", expected "
+ "%zu + %zu + %zu", sizeof *pages,
+ (sizeof (uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
+ goto err;
+ }
+
+ rc = process_page_data(ctx, pages->count, pfns, types,
+ &pages->pfn[pages->count]);
+ err:
+ free(types);
+ free(pfns);
+
+ return rc;
+}
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxc/saverestore/save.c b/tools/libxc/saverestore/save.c
index c013e62..e842e6c 100644
--- a/tools/libxc/saverestore/save.c
+++ b/tools/libxc/saverestore/save.c
@@ -1,5 +1,47 @@
+#include <arpa/inet.h>
+
#include "common.h"
+int write_headers(struct context *ctx, uint16_t guest_type)
+{
+ xc_interface *xch = ctx->xch;
+ int32_t xen_version = xc_version(xch, XENVER_version, NULL);
+ struct ihdr ihdr =
+ {
+ .marker = IHDR_MARKER,
+ .id = htonl(IHDR_ID),
+ .version = htonl(IHDR_VERSION),
+ .options = htons(IHDR_OPT_LITTLE_ENDIAN),
+ };
+ struct dhdr dhdr =
+ {
+ .type = guest_type,
+ .page_shift = 12,
+ .xen_major = (xen_version >> 16) & 0xffff,
+ .xen_minor = (xen_version) & 0xffff,
+ };
+
+ if ( xen_version < 0 )
+ {
+ PERROR("Unable to obtain Xen Version");
+ return -1;
+ }
+
+ if ( write_exact(ctx->fd, &ihdr, sizeof ihdr) )
+ {
+ PERROR("Unable to write Image Header to stream");
+ return -1;
+ }
+
+ if ( write_exact(ctx->fd, &dhdr, sizeof dhdr) )
+ {
+ PERROR("Unable to write Domain Header to stream");
+ return -1;
+ }
+
+ return 0;
+}
+
int xc_domain_save2(xc_interface *xch, int io_fd, uint32_t dom, uint32_t
max_iters,
uint32_t max_factor, uint32_t flags,
struct save_callbacks* callbacks, int hvm,
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |