[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 5/9] tools/libxc: common code



Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 tools/libxc/saverestore/common.c         |   87 ++++++
 tools/libxc/saverestore/common.h         |  172 ++++++++++++
 tools/libxc/saverestore/common_x86.c     |   54 ++++
 tools/libxc/saverestore/common_x86.h     |   21 ++
 tools/libxc/saverestore/common_x86_hvm.c |   53 ++++
 tools/libxc/saverestore/common_x86_pv.c  |  431 ++++++++++++++++++++++++++++++
 tools/libxc/saverestore/common_x86_pv.h  |  104 +++++++
 tools/libxc/saverestore/restore.c        |  288 ++++++++++++++++++++
 tools/libxc/saverestore/save.c           |   42 +++
 9 files changed, 1252 insertions(+)
 create mode 100644 tools/libxc/saverestore/common_x86.c
 create mode 100644 tools/libxc/saverestore/common_x86.h
 create mode 100644 tools/libxc/saverestore/common_x86_hvm.c
 create mode 100644 tools/libxc/saverestore/common_x86_pv.c
 create mode 100644 tools/libxc/saverestore/common_x86_pv.h

diff --git a/tools/libxc/saverestore/common.c b/tools/libxc/saverestore/common.c
index de2e727..b159c4c 100644
--- a/tools/libxc/saverestore/common.c
+++ b/tools/libxc/saverestore/common.c
@@ -1,3 +1,5 @@
+#include <assert.h>
+
 #include "common.h"
 
 static const char *dhdr_types[] =
@@ -52,6 +54,91 @@ const char *rec_type_to_str(uint32_t type)
     return "Reserved";
 }
 
+int write_split_record(struct context *ctx, struct record *rec,
+                       void *buf, size_t sz)
+{
+    static const char zeroes[7] = { 0 };
+    xc_interface *xch = ctx->xch;
+    uint32_t combined_length = rec->length + sz;
+    size_t record_length = (combined_length + 7) & ~7UL;
+
+    if ( record_length > REC_LENGTH_MAX )
+    {
+        ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32
+              " exceeds max (0x%"PRIx32")", rec->type,
+              rec_type_to_str(rec->type), rec->length, REC_LENGTH_MAX);
+        return -1;
+    }
+
+    if ( rec->length )
+        assert(rec->data);
+    if ( sz )
+        assert(buf);
+
+    if ( write_exact(ctx->fd, &rec->type, sizeof rec->type) ||
+         write_exact(ctx->fd, &combined_length, sizeof rec->length) ||
+         (rec->length && write_exact(ctx->fd, rec->data, rec->length)) ||
+         (sz && write_exact(ctx->fd, buf, sz)) ||
+         write_exact(ctx->fd, zeroes, record_length - combined_length) )
+    {
+        PERROR("Unable to write record to stream");
+        return -1;
+    }
+
+    return 0;
+}
+
+int read_record(struct context *ctx, struct record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct rhdr rhdr;
+    size_t datasz;
+
+    if ( read_exact(ctx->fd, &rhdr, sizeof rhdr) )
+    {
+        PERROR("Failed to read Record Header from stream");
+        return -1;
+    }
+    else if ( rhdr.length > REC_LENGTH_MAX )
+    {
+        ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32
+              " exceeds max (0x%"PRIx32")",
+              rhdr.type, rec_type_to_str(rhdr.type),
+              rhdr.length, REC_LENGTH_MAX);
+        return -1;
+    }
+
+    datasz = (rhdr.length + 7) & ~7U;
+
+    if ( datasz )
+    {
+        rec->data = malloc(datasz);
+
+        if ( !rec->data )
+        {
+            ERROR("Unable to allocate %zu bytes for record data 
(0x%08"PRIx32", %s)",
+                  datasz, rhdr.type, rec_type_to_str(rhdr.type));
+            return -1;
+        }
+
+        if ( read_exact(ctx->fd, rec->data, datasz) )
+        {
+            free(rec->data);
+            rec->data = NULL;
+            PERROR("Failed to read %zu bytes of data for record 
(0x%08"PRIx32", %s)",
+                   datasz, rhdr.type, rec_type_to_str(rhdr.type));
+            return -1;
+        }
+    }
+    else
+        rec->data = NULL;
+
+    rec->type   = rhdr.type;
+    rec->length = rhdr.length;
+
+    return 0;
+};
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h
index fff0a39..a35eda7 100644
--- a/tools/libxc/saverestore/common.h
+++ b/tools/libxc/saverestore/common.h
@@ -1,7 +1,20 @@
 #ifndef __COMMON__H
 #define __COMMON__H
 
+#include <stdbool.h>
+
+// Hack out junk from the namespace
+#define mfn_to_pfn __UNUSED_mfn_to_pfn
+#define pfn_to_mfn __UNUSED_pfn_to_mfn
+
 #include "../xg_private.h"
+#include "../xg_save_restore.h"
+#include "../xc_dom.h"
+#include "../xc_bitops.h"
+
+#undef mfn_to_pfn
+#undef pfn_to_mfn
+
 
 #include "stream_format.h"
 
@@ -11,6 +24,165 @@
 const char *dhdr_type_to_str(uint32_t type);
 const char *rec_type_to_str(uint32_t type);
 
+struct context;
+
+struct save_restore_ops
+{
+    bool (*pfn_is_valid)(struct context *ctx, xen_pfn_t pfn);
+    xen_pfn_t (*pfn_to_gfn)(struct context *ctx, xen_pfn_t pfn);
+    void (*set_gfn)(struct context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
+    void (*set_page_type)(struct context *ctx, xen_pfn_t pfn, xen_pfn_t type);
+    int (*normalise_page)(struct context *ctx, xen_pfn_t type, void **page);
+    int (*localise_page)(struct context *ctx, uint32_t type, void *page);
+};
+
+struct context
+{
+    xc_interface *xch;
+    uint32_t domid;
+    int fd;
+
+    xc_dominfo_t dominfo;
+
+    struct save_restore_ops ops;
+
+    union
+    {
+        struct
+        {
+            /* From Image Header */
+            uint32_t format_version;
+
+            /* From Domain Header */
+            uint32_t guest_type;
+            uint32_t guest_page_size;
+
+            unsigned long xenstore_mfn, console_mfn;
+            unsigned int xenstore_evtchn, console_evtchn;
+            domid_t xenstore_domid, console_domid;
+
+            struct restore_callbacks *callbacks;
+
+            /* Bitmap of currently populated PFNs during restore. */
+            unsigned long *populated_pfns;
+            unsigned int max_populated_pfn;
+        } restore;
+
+        struct
+        {
+            unsigned long p2m_size;
+
+            struct save_callbacks *callbacks;
+        } save;
+    };
+
+    xen_pfn_t *batch_pfns;
+    unsigned nr_batch_pfns;
+    unsigned long *deferred_pages;
+
+    union
+    {
+        struct
+        {
+            /* 4 or 8; 32 or 64 bit domain */
+            unsigned int width;
+            /* 3 or 4 pagetable levels */
+            unsigned int levels;
+
+
+            /* Maximum Xen frame */
+            unsigned long max_mfn;
+            /* Read-only machine to phys map */
+            xen_pfn_t *m2p;
+            /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
+            xen_pfn_t compat_m2p_mfn0;
+            /* Number of m2p frames mapped */
+            unsigned long nr_m2p_frames;
+
+
+            /* Maximum guest frame */
+            unsigned long max_pfn;
+            /* Frames per page in guest p2m */
+            unsigned int fpp;
+
+            /* Number of frames making up the p2m */
+            unsigned int p2m_frames;
+            /* Guest's phys to machine map.  Mapped read-only (save) or
+             * allocated locally (restore).  Uses guest unsigned longs. */
+            void *p2m;
+            /* The guest pfns containing the p2m leaves */
+            xen_pfn_t *p2m_pfns;
+            /* Types for each page */
+            uint32_t *pfn_types;
+
+            /* Read-only mapping of guests shared info page */
+            shared_info_any_t *shinfo;
+        } x86_pv;
+    };
+};
+
+/*
+ * Write the image and domain headers to the stream.
+ * (to eventually make static in save.c)
+ */
+int write_headers(struct context *ctx, uint16_t guest_type);
+
+extern struct save_restore_ops save_restore_ops_x86_pv;
+extern struct save_restore_ops save_restore_ops_x86_hvm;
+
+struct record
+{
+    uint32_t type;
+    uint32_t length;
+    void *data;
+};
+
+/*
+ * Writes a split record to the stream, applying correct padding where
+ * appropriate.  It is common when sending records containing blobs from Xen
+ * that the header and blob data are separate.  This function accepts a second
+ * buffer and length, and will merge it with the main record when sending.
+ *
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+int write_split_record(struct context *ctx, struct record *rec, void *buf, 
size_t sz);
+
+/*
+ * Writes a record to the stream, applying correct padding where appropriate.
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+static inline int write_record(struct context *ctx, struct record *rec)
+{
+    return write_split_record(ctx, rec, NULL, 0);
+}
+
+/*
+ * Reads a record from the stream, and fills in the record structure.
+ *
+ * Returns 0 on success and non-0 on failure.
+ *
+ * On success, the records type and size shall be valid.
+ * - If size is 0, data shall be NULL.
+ * - If size is non-0, data shall be a buffer allocated by malloc() which must
+ *   be passed to free() by the caller.
+ *
+ * On failure, the contents of the record structure are undefined.
+ */
+int read_record(struct context *ctx, struct record *rec);
+
+int write_page_data_and_pause(struct context *ctx);
+
+int handle_page_data(struct context *ctx, struct record *rec);
+
+int populate_pfns(struct context *ctx, unsigned count,
+                  const xen_pfn_t *original_pfns, const uint32_t *types);
+
 #endif
 /*
  * Local variables:
diff --git a/tools/libxc/saverestore/common_x86.c 
b/tools/libxc/saverestore/common_x86.c
new file mode 100644
index 0000000..0a3d555
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86.c
@@ -0,0 +1,54 @@
+#include "common_x86.h"
+
+int write_tsc_info(struct context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct rec_tsc_info tsc = { 0 };
+    struct record rec =
+    {
+        .type = REC_TYPE_tsc_info,
+        .length = sizeof tsc,
+        .data = &tsc
+    };
+
+    if ( xc_domain_get_tsc_info(xch, ctx->domid, &tsc.mode,
+                                &tsc.nsec, &tsc.khz, &tsc.incarnation) < 0 )
+    {
+        PERROR("Unable to obtain TSC information");
+        return -1;
+    }
+
+    return write_record(ctx, &rec);
+}
+
+int handle_tsc_info(struct context *ctx, struct record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct rec_tsc_info *tsc = rec->data;
+
+    if ( rec->length != sizeof *tsc )
+    {
+        ERROR("TSC_INFO record wrong size: length %"PRIu32", expected %zu",
+              rec->length, sizeof *tsc);
+        return -1;
+    }
+
+    if ( xc_domain_set_tsc_info(xch, ctx->domid, tsc->mode,
+                                tsc->nsec, tsc->khz, tsc->incarnation) )
+    {
+        PERROR("Unable to set TSC information");
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86.h 
b/tools/libxc/saverestore/common_x86.h
new file mode 100644
index 0000000..429532a
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86.h
@@ -0,0 +1,21 @@
+#ifndef __COMMON_X86__H
+#define __COMMON_X86__H
+
+#include "common.h"
+
+/* Obtains and writes domain TSC information to the stream */
+int write_tsc_info(struct context *ctx);
+
+/* Parses domain TSC information from the stream */
+int handle_tsc_info(struct context *ctx, struct record *rec);
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86_hvm.c 
b/tools/libxc/saverestore/common_x86_hvm.c
new file mode 100644
index 0000000..0b9aac2
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86_hvm.c
@@ -0,0 +1,53 @@
+#include "common.h"
+
+static bool x86_hvm_pfn_is_valid(struct context *ctx, xen_pfn_t pfn)
+{
+    return true;
+}
+
+static xen_pfn_t x86_hvm_pfn_to_gfn(struct context *ctx, xen_pfn_t pfn)
+{
+    return pfn;
+}
+
+static void x86_hvm_set_gfn(struct context *ctx, xen_pfn_t pfn,
+                            xen_pfn_t gfn)
+{
+    /* no op */
+}
+
+static void x86_hvm_set_page_type(struct context *ctx, xen_pfn_t pfn, 
xen_pfn_t type)
+{
+    /* no-op */
+}
+
+static int x86_hvm_normalise_page(struct context *ctx, xen_pfn_t type, void 
**page)
+{
+    /* no-op */
+    return 0;
+}
+
+static int x86_hvm_localise_page(struct context *ctx, uint32_t type, void 
*page)
+{
+    /* no-op */
+    return 0;
+}
+
+struct save_restore_ops save_restore_ops_x86_hvm = {
+    .pfn_is_valid   = x86_hvm_pfn_is_valid,
+    .pfn_to_gfn     = x86_hvm_pfn_to_gfn,
+    .set_gfn        = x86_hvm_set_gfn,
+    .set_page_type  = x86_hvm_set_page_type,
+    .normalise_page = x86_hvm_normalise_page,
+    .localise_page  = x86_hvm_localise_page
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86_pv.c 
b/tools/libxc/saverestore/common_x86_pv.c
new file mode 100644
index 0000000..35bce27
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86_pv.c
@@ -0,0 +1,431 @@
+#include <assert.h>
+
+#include "common_x86_pv.h"
+
+xen_pfn_t mfn_to_pfn(struct context *ctx, xen_pfn_t mfn)
+{
+    assert(mfn <= ctx->x86_pv.max_mfn);
+    return ctx->x86_pv.m2p[mfn];
+}
+
+static bool x86_pv_pfn_is_valid(struct context *ctx, xen_pfn_t pfn)
+{
+    return pfn <= ctx->x86_pv.max_pfn;
+}
+
+static xen_pfn_t x86_pv_pfn_to_gfn(struct context *ctx, xen_pfn_t pfn)
+{
+    assert(pfn <= ctx->x86_pv.max_pfn);
+
+    if ( ctx->x86_pv.width == sizeof (uint64_t) )
+        /* 64 bit guest.  Need to truncate their pfns for 32 bit toolstacks */
+        return ((uint64_t *)ctx->x86_pv.p2m)[pfn];
+    else
+    {
+        /* 32 bit guest.  Need to expand INVALID_MFN fot 64 bit toolstacks */
+        uint32_t mfn = ((uint32_t *)ctx->x86_pv.p2m)[pfn];
+
+        return mfn == ~0U ? INVALID_MFN : mfn;
+    }
+}
+
+static void x86_pv_set_page_type(struct context *ctx, xen_pfn_t pfn,
+                                 unsigned long type)
+{
+    assert(pfn <= ctx->x86_pv.max_pfn);
+
+    ctx->x86_pv.pfn_types[pfn] = type;
+}
+
+static void x86_pv_set_gfn(struct context *ctx, xen_pfn_t pfn,
+                           xen_pfn_t mfn)
+{
+    assert(pfn <= ctx->x86_pv.max_pfn);
+
+    if ( ctx->x86_pv.width == sizeof (uint64_t) )
+        /* 64 bit guest.  Need to expand INVALID_MFN for 32 bit toolstacks */
+        ((uint64_t *)ctx->x86_pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn;
+    else
+        /* 32 bit guest.  Can safely truncate INVALID_MFN fot 64 bit 
toolstacks */
+        ((uint32_t *)ctx->x86_pv.p2m)[pfn] = mfn;
+}
+
+static int normalise_pagetable(struct context *ctx, const uint64_t *src,
+                               uint64_t *dst, unsigned long type)
+{
+    xc_interface *xch = ctx->xch;
+    uint64_t pte;
+    unsigned i, xen_first = -1, xen_last = -1; /* Indicies of Xen mappings */
+
+    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+    if ( ctx->x86_pv.levels == 4 )
+    {
+        /* 64bit guests only have Xen mappings in their L4 tables */
+        if ( type == XEN_DOMCTL_PFINFO_L4TAB )
+        {
+            xen_first = 256;
+            xen_last = 271;
+        }
+    }
+    else
+    {
+        switch ( type )
+        {
+        case XEN_DOMCTL_PFINFO_L4TAB:
+            ERROR("??? Found L4 table for 32bit guest");
+            errno = EINVAL;
+            return -1;
+
+        case XEN_DOMCTL_PFINFO_L3TAB:
+            /* 32bit guests can only use the first 4 entries of their L3 
tables.
+             * All other are potentially used by Xen. */
+            xen_first = 4;
+            xen_last = 512;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+            /* It is hard to spot Xen mappings in a 32bit guest's L2.  Most
+             * are normal but only a few will have Xen mappings.
+             *
+             * 428 = (HYPERVISOR_VIRT_START_PAE >> L2_PAGETABLE_SHIFT_PAE) & 
0x1ff
+             *
+             * ...which is conveniently unavailable to us in a 64bit build.
+             */
+            if ( pte_to_frame(ctx, src[428]) == ctx->x86_pv.compat_m2p_mfn0 )
+            {
+                xen_first = 428;
+                xen_last = 512;
+            }
+            break;
+        }
+    }
+
+    for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+    {
+        xen_pfn_t mfn, pfn;
+
+        pte = src[i];
+
+        /* Remove Xen mappings: Xen will reconstruct on the other side */
+        if ( i >= xen_first && i <= xen_last )
+            pte = 0;
+
+        if ( pte & _PAGE_PRESENT )
+        {
+            mfn = pte_to_frame(ctx, pte);
+
+            if ( pte & _PAGE_PSE )
+            {
+                ERROR("Cannot migrate superpage (L%lu[%u]: 0x%016"PRIx64")",
+                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
+                errno = E2BIG;
+                return -1;
+            }
+
+            if ( !mfn_in_pseudophysmap(ctx, mfn) )
+            {
+                /* This is expected during the live part of migration given
+                 * split pagetable updates, active grant mappings etc.  The
+                 * pagetable will need to be resent after pausing.  It is
+                 * however fatal if we have already paused the domain. */
+                if ( !ctx->dominfo.paused )
+                    errno = EAGAIN;
+                else
+                {
+                    ERROR("Bad MFN for L%lu[%u]",
+                          type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
+                    pseudophysmap_walk(ctx, mfn);
+                    errno = ERANGE;
+                }
+                return -1;
+            }
+            else
+                pfn = mfn_to_pfn(ctx, mfn);
+
+            update_pte(ctx, &pte, pfn);
+        }
+
+        dst[i] = pte;
+    }
+
+    return 0;
+}
+
+static int x86_pv_normalise_page(struct context *ctx, xen_pfn_t type,
+                                 void **page)
+{
+    xc_interface *xch = ctx->xch;
+    void *local_page;
+    int rc;
+
+    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+    if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+        return 0;
+
+    local_page = malloc(PAGE_SIZE);
+    if ( !local_page )
+    {
+        ERROR("Unable to allocate scratch page");
+        rc = -1;
+        goto out;
+    }
+
+    rc = normalise_pagetable(ctx, *page, local_page, type);
+    *page = local_page;
+
+  out:
+    return rc;
+}
+
+static int x86_pv_localise_page(struct context *ctx, uint32_t type, void *page)
+{
+    xc_interface *xch = ctx->xch;
+    uint64_t *table = page;
+    uint64_t pte;
+    unsigned i;
+
+    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+    /* Only page tables need localisation. */
+    if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+        return 0;
+
+    for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+    {
+        pte = table[i];
+
+        if ( pte & _PAGE_PRESENT )
+        {
+            xen_pfn_t mfn, pfn;
+
+            pfn = pte_to_frame(ctx, pte);
+            mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+
+            if ( mfn == INVALID_MFN )
+            {
+                if ( populate_pfns(ctx, 1, &pfn, &type) )
+                    return -1;
+
+                mfn = ctx->ops.pfn_to_gfn(ctx, pfn);
+            }
+
+            if ( !mfn_in_pseudophysmap(ctx, mfn) )
+            {
+                ERROR("Bad MFN for L%lu[%u]",
+                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
+                pseudophysmap_walk(ctx, mfn);
+                errno = ERANGE;
+                return -1;
+            }
+
+            update_pte(ctx, &pte, mfn);
+
+            table[i] = pte;
+        }
+    }
+
+    return 0;
+}
+
+struct save_restore_ops save_restore_ops_x86_pv = {
+    .pfn_is_valid   = x86_pv_pfn_is_valid,
+    .pfn_to_gfn     = x86_pv_pfn_to_gfn,
+    .set_page_type  = x86_pv_set_page_type,
+    .set_gfn        = x86_pv_set_gfn,
+    .normalise_page = x86_pv_normalise_page,
+    .localise_page  = x86_pv_localise_page,
+};
+
+bool mfn_in_pseudophysmap(struct context *ctx, xen_pfn_t mfn)
+{
+    return ( (mfn <= ctx->x86_pv.max_mfn) &&
+             (mfn_to_pfn(ctx, mfn) <= ctx->x86_pv.max_pfn) &&
+             (ctx->ops.pfn_to_gfn(ctx, mfn_to_pfn(ctx, mfn) == mfn)) );
+}
+
+void pseudophysmap_walk(struct context *ctx, xen_pfn_t mfn)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t pfn = ~0UL;
+
+    ERROR("mfn %#lx, max %#lx", mfn, ctx->x86_pv.max_mfn);
+
+    if ( (mfn != ~0UL) && (mfn <= ctx->x86_pv.max_mfn) )
+    {
+        pfn = ctx->x86_pv.m2p[mfn];
+        ERROR("  m2p[%#lx] = %#lx, max_pfn %#lx",
+              mfn, pfn, ctx->x86_pv.max_pfn);
+    }
+
+    if ( (pfn != ~0UL) && (pfn <= ctx->x86_pv.max_pfn) )
+        ERROR("  p2m[%#lx] = %#lx",
+              pfn, ctx->ops.pfn_to_gfn(ctx, pfn));
+}
+
+xen_pfn_t cr3_to_mfn(struct context *ctx, uint64_t cr3)
+{
+    if ( ctx->x86_pv.width == 8 )
+        return cr3 >> 12;
+    else
+        return (((uint32_t)cr3 >> 12) | ((uint32_t)cr3 << 20));
+}
+
+uint64_t mfn_to_cr3(struct context *ctx, xen_pfn_t mfn)
+{
+    if ( ctx->x86_pv.width == 8 )
+        return ((uint64_t)mfn) << 12;
+    else
+        return (((uint32_t)mfn << 12) | ((uint32_t)mfn >> 20));
+}
+
+int x86_pv_domain_info(struct context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int guest_width, guest_levels, fpp;
+    int max_pfn;
+
+    /* Get the domain width */
+    if ( xc_domain_get_guest_width(xch, ctx->domid, &guest_width) )
+    {
+        PERROR("Unable to determine dom%d's width", ctx->domid);
+        return -1;
+    }
+    else if ( guest_width == 4 )
+        guest_levels = 3;
+    else if ( guest_width == 8 )
+        guest_levels = 4;
+    else
+    {
+        ERROR("Invalid guest width %d.  Expected 32 or 64", guest_width);
+        return -1;
+    }
+    ctx->x86_pv.width = guest_width;
+    ctx->x86_pv.levels = guest_levels;
+    ctx->x86_pv.fpp = fpp = PAGE_SIZE / ctx->x86_pv.width;
+
+    DPRINTF("%d bits, %d levels", guest_width * 8, guest_levels);
+
+    /* Get the domains maximum pfn */
+    max_pfn = xc_domain_maximum_gpfn(xch, ctx->domid);
+    if ( max_pfn < 0 )
+    {
+        PERROR("Unable to obtain guests max pfn");
+        return -1;
+    }
+    else if ( max_pfn >= ~XEN_DOMCTL_PFINFO_LTAB_MASK )
+    {
+        errno = E2BIG;
+        PERROR("Cannot save a guest this large %#x");
+        return -1;
+    }
+    else if ( max_pfn > 0 )
+    {
+        ctx->x86_pv.max_pfn = max_pfn;
+        ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp;
+
+        DPRINTF("max_pfn %#x, p2m_frames %d", max_pfn, ctx->x86_pv.p2m_frames);
+    }
+
+    return 0;
+}
+
+int x86_pv_map_m2p(struct context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    long max_page = xc_maximum_ram_page(xch);
+    unsigned long m2p_chunks, m2p_size;
+    privcmd_mmap_entry_t *entries = NULL;
+    xen_pfn_t *extents_start = NULL;
+    int rc = -1, i;
+
+    if ( max_page < 0 )
+    {
+        PERROR("Failed to get maximum ram page");
+        goto err;
+    }
+
+    ctx->x86_pv.max_mfn = max_page;
+    m2p_size   = M2P_SIZE(ctx->x86_pv.max_mfn);
+    m2p_chunks = M2P_CHUNKS(ctx->x86_pv.max_mfn);
+
+    extents_start = malloc(m2p_chunks * sizeof(xen_pfn_t));
+    if ( !extents_start )
+    {
+        ERROR("Unable to allocate %zu bytes for m2p mfns",
+              m2p_chunks * sizeof(xen_pfn_t));
+        goto err;
+    }
+
+    if ( xc_machphys_mfn_list(xch, m2p_chunks, extents_start) )
+    {
+        PERROR("Failed to get m2p mfn list");
+        goto err;
+    }
+
+    entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t));
+    if ( !entries )
+    {
+        ERROR("Unable to allocate %zu bytes for m2p mapping mfns",
+              m2p_chunks * sizeof(privcmd_mmap_entry_t));
+        goto err;
+    }
+
+    for ( i = 0; i < m2p_chunks; ++i )
+        entries[i].mfn = extents_start[i];
+
+    ctx->x86_pv.m2p = xc_map_foreign_ranges(
+        xch, DOMID_XEN, m2p_size, PROT_READ,
+        M2P_CHUNK_SIZE, entries, m2p_chunks);
+
+    if ( !ctx->x86_pv.m2p )
+    {
+        PERROR("Failed to mmap m2p ranges");
+        goto err;
+    }
+
+    ctx->x86_pv.nr_m2p_frames = (M2P_CHUNK_SIZE >> PAGE_SHIFT) * m2p_chunks;
+
+#ifdef __i386__
+    /* 32 bit toolstacks automatically get the compat m2p */
+    ctx->x86_pv.compat_m2p_mfn0 = entries[0].mfn;
+#else
+    /* 64 bit toolstacks need to ask Xen specially for it */
+    {
+        struct xen_machphys_mfn_list xmml = {
+            .max_extents = 1,
+            .extent_start = { &ctx->x86_pv.compat_m2p_mfn0 }
+        };
+
+        rc = do_memory_op(xch, XENMEM_machphys_compat_mfn_list,
+                          &xmml, sizeof xmml);
+        if ( rc || xmml.nr_extents != 1 )
+        {
+            PERROR("Failed to get compat mfn list from Xen");
+            rc = -1;
+            goto err;
+        }
+    }
+#endif
+
+    /* All Done */
+    rc = 0;
+    DPRINTF("max_mfn %#lx", ctx->x86_pv.max_mfn);
+
+err:
+    free(entries);
+    free(extents_start);
+
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/common_x86_pv.h 
b/tools/libxc/saverestore/common_x86_pv.h
new file mode 100644
index 0000000..c7315b6
--- /dev/null
+++ b/tools/libxc/saverestore/common_x86_pv.h
@@ -0,0 +1,104 @@
+#ifndef __COMMON_X86_PV_H
+#define __COMMON_X86_PV_H
+
+#include "common_x86.h"
+
+/*
+ * Convert an mfn to a pfn, given Xens m2p table.
+ *
+ * Caller must ensure that the requested mfn is in range.
+ */
+xen_pfn_t mfn_to_pfn(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Convert a pfn to an mfn, given the guests p2m table.
+ *
+ * Caller must ensure that the requested pfn is in range.
+ */
+xen_pfn_t pfn_to_mfn(struct context *ctx, xen_pfn_t pfn);
+
+/*
+ * Set a mapping in the p2m table.
+ *
+ * Caller must ensure that the requested pfn is in range.
+ */
+void set_p2m(struct context *ctx, xen_pfn_t pfn, xen_pfn_t mfn);
+
+/*
+ * Query whether a particular mfn is valid in the physmap of a guest.
+ */
+bool mfn_in_pseudophysmap(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Debug a particular mfn by walking the p2m and m2p.
+ */
+void pseudophysmap_walk(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Convert a PV cr3 field to an mfn.
+ */
+xen_pfn_t cr3_to_mfn(struct context *ctx, uint64_t cr3);
+
+/*
+ * Convert an mfn to a PV cr3 field.
+ */
+uint64_t mfn_to_cr3(struct context *ctx, xen_pfn_t mfn);
+
+/*
+ * Extract an MFN from a Pagetable Entry.
+ */
+static inline xen_pfn_t pte_to_frame(struct context *ctx, uint64_t pte)
+{
+    if ( ctx->x86_pv.width == 8 )
+        return (pte >> PAGE_SHIFT) & ((1ULL << (52 - PAGE_SHIFT)) - 1);
+    else
+        return (pte >> PAGE_SHIFT) & ((1ULL << (44 - PAGE_SHIFT)) - 1);
+}
+
+static inline void update_pte(struct context *ctx, uint64_t *pte, xen_pfn_t 
pfn)
+{
+    if ( ctx->x86_pv.width == 8 )
+        *pte &= ~(((1ULL << (52 - PAGE_SHIFT)) - 1) << PAGE_SHIFT);
+    else
+        *pte &= ~(((1ULL << (44 - PAGE_SHIFT)) - 1) << PAGE_SHIFT);
+
+    *pte |= (uint64_t)pfn << PAGE_SHIFT;
+}
+
+/*
+ * Get current domain information.
+ *
+ * Fills ctx->x86_pv
+ * - .width
+ * - .levels
+ * - .fpp
+ * - .p2m_frames
+ *
+ * Used by the save side to create the X86_PV_INFO record, and by the restore
+ * side to verify the incoming stream.
+ *
+ * Returns 0 on success and non-zero on error.
+ */
+int x86_pv_domain_info(struct context *ctx);
+
+/*
+ * Maps the Xen M2P.
+ *
+ * Fills ctx->x86_pv.
+ * - .max_mfn
+ * - .m2p
+ *
+ * Returns 0 on success and non-zero on error.
+ */
+int x86_pv_map_m2p(struct context *ctx);
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxc/saverestore/restore.c 
b/tools/libxc/saverestore/restore.c
index 6624baa..5834d38 100644
--- a/tools/libxc/saverestore/restore.c
+++ b/tools/libxc/saverestore/restore.c
@@ -12,6 +12,294 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, 
uint32_t dom,
     return -1;
 }
 
+static bool pfn_is_populated(struct context *ctx, xen_pfn_t pfn)
+{
+    if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn )
+        return false;
+    return test_bit(pfn, ctx->restore.populated_pfns);
+}
+
+static int pfn_set_populated(struct context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+
+    if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn )
+    {
+        unsigned long new_max_pfn = ((pfn + 1024) & ~1023) - 1;
+        size_t old_sz, new_sz;
+        unsigned long *p;
+
+        old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
+        new_sz = bitmap_size(new_max_pfn + 1);
+
+        p  = realloc(ctx->restore.populated_pfns, new_sz);
+        if ( !p )
+        {
+            PERROR("Failed to realloc populated bitmap");
+            return -1;
+        }
+
+        memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
+
+        ctx->restore.populated_pfns    = p;
+        ctx->restore.max_populated_pfn = new_max_pfn;
+    }
+
+    set_bit(pfn, ctx->restore.populated_pfns);
+
+    return 0;
+}
+
+int populate_pfns(struct context *ctx, unsigned count,
+                  const xen_pfn_t *original_pfns, const uint32_t *types)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = malloc(count * sizeof *mfns),
+        *pfns = malloc(count * sizeof *pfns);
+    unsigned i, nr_pfns = 0;
+    int rc = -1;
+
+    if ( !mfns || !pfns )
+    {
+        ERROR("Failed to allocate %zu bytes for populating the physmap",
+              2 * count * sizeof *mfns);
+        goto err;
+    }
+
+    for ( i = 0; i < count; ++i )
+    {
+        if ( types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+             types[i] != XEN_DOMCTL_PFINFO_BROKEN &&
+             !pfn_is_populated(ctx, original_pfns[i]) )
+        {
+            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
+            ++nr_pfns;
+        }
+    }
+
+    if ( nr_pfns )
+    {
+        rc = xc_domain_populate_physmap_exact(xch, ctx->domid, nr_pfns, 0, 0, 
mfns);
+        if ( rc )
+        {
+            PERROR("Failed to populate physmap");
+            goto err;
+        }
+
+        for ( i = 0; i < nr_pfns; ++i )
+        {
+            rc = pfn_set_populated(ctx, pfns[i]);
+            if ( rc )
+                goto err;
+            ctx->ops.set_gfn(ctx, pfns[i], mfns[i]);
+        }
+    }
+
+    rc = 0;
+
+ err:
+    free(pfns);
+    free(mfns);
+
+    return rc;
+}
+
+static int process_page_data(struct context *ctx, unsigned count,
+                             xen_pfn_t *pfns, uint32_t *types, void *page_data)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = malloc(count * sizeof *mfns);
+    int *map_errs = malloc(count * sizeof *map_errs);
+    int rc = -1;
+    void *mapping = NULL, *guest_page = NULL;
+    unsigned i,    /* i indexes the pfns from the record */
+        j,         /* j indexes the subset of pfns we decide to map */
+        nr_pages;
+
+    if ( !mfns || !map_errs )
+    {
+        ERROR("Failed to allocate %zu bytes to process page data",
+              count * (sizeof *mfns + sizeof *map_errs));
+        goto err;
+    }
+
+    rc = populate_pfns(ctx, count, pfns, types);
+    if ( rc )
+    {
+        ERROR("Failed to populate pfns for batch of %u pages", count);
+        goto err;
+    }
+    rc = -1;
+
+    for ( i = 0, nr_pages = 0; i < count; ++i )
+    {
+        ctx->ops.set_page_type(ctx, pfns[i], types[i]);
+
+        switch ( types[i] )
+        {
+        case XEN_DOMCTL_PFINFO_NOTAB:
+
+        case XEN_DOMCTL_PFINFO_L1TAB:
+        case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L4TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+            mfns[nr_pages++] = ctx->ops.pfn_to_gfn(ctx, pfns[i]);
+            break;
+        }
+
+    }
+
+    if ( nr_pages > 0 )
+    {
+        mapping = guest_page = xc_map_foreign_bulk(
+            xch, ctx->domid, PROT_READ | PROT_WRITE,
+            mfns, map_errs, nr_pages);
+        if ( !mapping )
+        {
+            PERROR("Unable to map %u mfns for %u pages of data",
+                   nr_pages, count);
+            goto err;
+        }
+    }
+
+    for ( i = 0, j = 0; i < count; ++i )
+    {
+        switch ( types[i] )
+        {
+        case XEN_DOMCTL_PFINFO_XTAB:
+        case XEN_DOMCTL_PFINFO_BROKEN:
+            /* Nothing at all to do */
+        case XEN_DOMCTL_PFINFO_XALLOC:
+            /* Nothing futher to do */
+            continue;
+        }
+
+        if ( map_errs[j] )
+        {
+            ERROR("Mapping pfn %lx (mfn %lx, type %#"PRIx32")failed with %d",
+                  pfns[i], mfns[j], types[i], map_errs[j]);
+            goto err;
+        }
+
+        memcpy(guest_page, page_data, PAGE_SIZE);
+
+        /* Undo page normalisation done by the saver. */
+        rc = ctx->ops.localise_page(ctx, types[i], guest_page);
+        if ( rc )
+        {
+            DPRINTF("Failed to localise");
+            goto err;
+        }
+
+        ++j;
+        guest_page += PAGE_SIZE;
+        page_data += PAGE_SIZE;
+    }
+
+    rc = 0;
+
+ err:
+    if ( mapping )
+        munmap(mapping, nr_pages * PAGE_SIZE);
+
+    free(map_errs);
+    free(mfns);
+
+    return rc;
+}
+
+int handle_page_data(struct context *ctx, struct record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct rec_page_data_header *pages = rec->data;
+    unsigned i, pages_of_data = 0;
+    int rc = -1;
+
+    xen_pfn_t *pfns = NULL, pfn;
+    uint32_t *types = NULL, type;
+
+    static unsigned pg_count;
+    pg_count++;
+
+    if ( rec->length < sizeof *pages )
+    {
+        ERROR("PAGE_DATA record trucated: length %"PRIu32", min %zu",
+              rec->length, sizeof *pages);
+        goto err;
+    }
+    else if ( pages->count < 1 )
+    {
+        ERROR("Expected at least 1 pfn in PAGE_DATA record");
+        goto err;
+    }
+    else if ( rec->length < sizeof *pages + (pages->count * sizeof (uint64_t)) 
)
+    {
+        ERROR("PAGE_DATA record (length %"PRIu32") too short to contain %"
+              PRIu32" pfns worth of information", rec->length, pages->count);
+        goto err;
+    }
+
+    pfns = malloc(pages->count * sizeof *pfns);
+    types = malloc(pages->count * sizeof *types);
+    if ( !pfns || !types )
+    {
+        ERROR("Unable to allocate enough memory for %"PRIu32" pfns",
+              pages->count);
+        goto err;
+    }
+
+    for ( i = 0; i < pages->count; ++i )
+    {
+        pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
+        if ( !ctx->ops.pfn_is_valid(ctx, pfn) )
+        {
+            ERROR("pfn %#lx (index %u) outside domain maximum", pfn, i);
+            goto err;
+        }
+
+        type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
+        if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
+             ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
+        {
+            ERROR("Invalid type %#lx for pfn %#lx (index %u)", type, pfn, i);
+            goto err;
+        }
+        else if ( type < XEN_DOMCTL_PFINFO_BROKEN )
+            /* NOTAB and all L1 thru L4 tables (including pinned) should have
+             * a page worth of data in the record. */
+            pages_of_data++;
+
+        pfns[i] = pfn;
+        types[i] = type;
+    }
+
+    if ( rec->length != (sizeof *pages +
+                         (sizeof (uint64_t) * pages->count) +
+                         (PAGE_SIZE * pages_of_data)) )
+    {
+        ERROR("PAGE_DATA record wrong size: length %"PRIu32", expected "
+              "%zu + %zu + %zu", sizeof *pages,
+              (sizeof (uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
+        goto err;
+    }
+
+    rc = process_page_data(ctx, pages->count, pfns, types,
+                           &pages->pfn[pages->count]);
+ err:
+    free(types);
+    free(pfns);
+
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxc/saverestore/save.c b/tools/libxc/saverestore/save.c
index c013e62..e842e6c 100644
--- a/tools/libxc/saverestore/save.c
+++ b/tools/libxc/saverestore/save.c
@@ -1,5 +1,47 @@
+#include <arpa/inet.h>
+
 #include "common.h"
 
+int write_headers(struct context *ctx, uint16_t guest_type)
+{
+    xc_interface *xch = ctx->xch;
+    int32_t xen_version = xc_version(xch, XENVER_version, NULL);
+    struct ihdr ihdr =
+        {
+            .marker  = IHDR_MARKER,
+            .id      = htonl(IHDR_ID),
+            .version = htonl(IHDR_VERSION),
+            .options = htons(IHDR_OPT_LITTLE_ENDIAN),
+        };
+    struct dhdr dhdr =
+        {
+            .type       = guest_type,
+            .page_shift = 12,
+            .xen_major  = (xen_version >> 16) & 0xffff,
+            .xen_minor  = (xen_version)       & 0xffff,
+        };
+
+    if ( xen_version < 0 )
+    {
+        PERROR("Unable to obtain Xen Version");
+        return -1;
+    }
+
+    if ( write_exact(ctx->fd, &ihdr, sizeof ihdr) )
+    {
+        PERROR("Unable to write Image Header to stream");
+        return -1;
+    }
+
+    if ( write_exact(ctx->fd, &dhdr, sizeof dhdr) )
+    {
+        PERROR("Unable to write Domain Header to stream");
+        return -1;
+    }
+
+    return 0;
+}
+
 int xc_domain_save2(xc_interface *xch, int io_fd, uint32_t dom, uint32_t 
max_iters,
                     uint32_t max_factor, uint32_t flags,
                     struct save_callbacks* callbacks, int hvm,
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.