[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/6] tools/libxc: Remove legacy migration implementation



It is no longer used.

One complication is that xc_map_m2p() has users in xc_offline_page.c,
xen-mfndump and xen-mceinj.  Move its implementation into
xc_offline_page (for want of a better location) beside it's current
user.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CC: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
CC: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
CC: Wei Liu <wei.liu2@xxxxxxxxxx>

---

Going forwards, xc_map_m2p() should move into libxc (being host
specific rather than guest specific) and gain a slightly more rational
API to consolidate several open-coded instances throughout tools/.
However, that is very much 4.7 work, given the current timescale on 4.6
---
 tools/libxc/Makefile            |    1 -
 tools/libxc/xc_domain_restore.c | 2411 ---------------------------------------
 tools/libxc/xc_domain_save.c    | 2198 -----------------------------------
 tools/libxc/xc_offline_page.c   |   59 +
 tools/libxc/xg_save_restore.h   |  247 ----
 5 files changed, 59 insertions(+), 4857 deletions(-)
 delete mode 100644 tools/libxc/xc_domain_restore.c
 delete mode 100644 tools/libxc/xc_domain_save.c

diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index 1aec848..a026c4e 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -53,7 +53,6 @@ CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios.c
 GUEST_SRCS-y :=
 GUEST_SRCS-y += xg_private.c xc_suspend.c
 ifeq ($(CONFIG_MIGRATE),y)
-GUEST_SRCS-y += xc_domain_restore.c xc_domain_save.c
 GUEST_SRCS-y += xc_sr_common.c
 GUEST_SRCS-$(CONFIG_X86) += xc_sr_common_x86.c
 GUEST_SRCS-$(CONFIG_X86) += xc_sr_common_x86_pv.c
diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
deleted file mode 100644
index 8435f6b..0000000
--- a/tools/libxc/xc_domain_restore.c
+++ /dev/null
@@ -1,2411 +0,0 @@
-/******************************************************************************
- * xc_domain_restore.c
- *
- * Restore the state of a guest session.
- *
- * Copyright (c) 2003, K A Fraser.
- * Copyright (c) 2006, Intel Corporation
- * Copyright (c) 2007, XenSource Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
- *
- */
-
-/*
- * The superpages flag in restore has two different meanings depending on
- * the type of domain.
- *
- * For an HVM domain, the flag means to look for properly aligned contiguous
- * pages and try to allocate a superpage to satisfy it.  If that fails,
- * fall back to small pages.
- *
- * For a PV domain, the flag means allocate all memory as superpages.  If that
- * fails, the restore fails.  This behavior is required for PV guests who
- * want to use superpages.
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xg_save_restore.h"
-#include "xc_dom.h"
-
-#include <xen/hvm/ioreq.h>
-#include <xen/hvm/params.h>
-
-struct restore_ctx {
-    unsigned long max_mfn; /* max mfn of the current host machine */
-    unsigned long hvirt_start; /* virtual starting address of the hypervisor */
-    unsigned int pt_levels; /* #levels of page tables used by the current 
guest */
-    unsigned long nr_pfns; /* number of 'in use' pfns in the guest (i.e. #P2M 
entries with a valid mfn) */
-    xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its 
current MFN. */
-    xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */
-    xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region.  */
-    xen_pfn_t *p2m_saved_batch; /* Copy of p2m_batch array for pv superpage 
alloc */
-    int superpages; /* Superpage allocation has been requested */
-    int hvm;    /* This is an hvm domain */
-    int completed; /* Set when a consistent image is available */
-    int last_checkpoint; /* Set when we should commit to the current 
checkpoint when it completes. */
-    int compressing; /* Set when sender signals that pages would be sent 
compressed (for Remus) */
-    struct domain_info_context dinfo;
-};
-
-#define HEARTBEAT_MS 1000
-
-#ifndef __MINIOS__
-static ssize_t rdexact(xc_interface *xch, struct restore_ctx *ctx,
-                       int fd, void* buf, size_t size)
-{
-    size_t offset = 0;
-    ssize_t len;
-    struct timeval tv;
-    fd_set rfds;
-
-    while ( offset < size )
-    {
-        if ( ctx->completed ) {
-            /* expect a heartbeat every HEARBEAT_MS ms maximum */
-            tv.tv_sec = HEARTBEAT_MS / 1000;
-            tv.tv_usec = (HEARTBEAT_MS % 1000) * 1000;
-
-            FD_ZERO(&rfds);
-            FD_SET(fd, &rfds);
-            len = select(fd + 1, &rfds, NULL, NULL, &tv);
-            if ( len == -1 && errno == EINTR )
-                continue;
-            if ( !FD_ISSET(fd, &rfds) ) {
-                ERROR("%s failed (select returned %zd)", __func__, len);
-                errno = ETIMEDOUT;
-                return -1;
-            }
-        }
-
-        len = read(fd, buf + offset, size - offset);
-        if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
-            continue;
-        if ( len == 0 ) {
-            ERROR("0-length read");
-            errno = 0;
-        }
-        if ( len <= 0 ) {
-            ERROR("%s failed (read rc: %zd, errno: %d)", __func__, len, errno);
-            return -1;
-        }
-        offset += len;
-    }
-
-    return 0;
-}
-
-#define RDEXACT(fd,buf,size) rdexact(xch, ctx, fd, buf, size)
-#else
-#define RDEXACT read_exact
-#endif
-
-#define SUPERPAGE_PFN_SHIFT  9
-#define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
-#define SUPERPAGE(_pfn) ((_pfn) & (~(SUPERPAGE_NR_PFNS-1)))
-#define SUPER_PAGE_START(pfn)    (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
-
-/*
-** When we're restoring into a pv superpage-allocated guest, we take
-** a copy of the p2m_batch array to preserve the pfn, then allocate the
-** corresponding superpages.  We then fill in the p2m array using the saved
-** pfns.
-*/
-static int alloc_superpage_mfns(
-    xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, int nr_mfns)
-{
-    int i, j, max = 0;
-    unsigned long pfn, base_pfn, mfn;
-
-    for (i = 0; i < nr_mfns; i++)
-    {
-        pfn = ctx->p2m_batch[i];
-        base_pfn = SUPERPAGE(pfn);
-        if (ctx->p2m[base_pfn] != (INVALID_P2M_ENTRY-2))
-        {
-            ctx->p2m_saved_batch[max] = base_pfn;
-            ctx->p2m_batch[max] = base_pfn;
-            max++;
-            ctx->p2m[base_pfn] = INVALID_P2M_ENTRY-2;
-        }
-    }
-    if (xc_domain_populate_physmap_exact(xch, dom, max, SUPERPAGE_PFN_SHIFT,
-                                         0, ctx->p2m_batch) != 0)
-        return 1;
-
-    for (i = 0; i < max; i++)
-    {
-        mfn = ctx->p2m_batch[i];
-        pfn = ctx->p2m_saved_batch[i];
-        for (j = 0; j < SUPERPAGE_NR_PFNS; j++)
-            ctx->p2m[pfn++] = mfn++;
-    }
-    return 0;
-}
-/*
-** In the state file (or during transfer), all page-table pages are
-** converted into a 'canonical' form where references to actual mfns
-** are replaced with references to the corresponding pfns.
-** This function inverts that operation, replacing the pfn values with
-** the (now known) appropriate mfn values.
-*/
-static int uncanonicalize_pagetable(
-    xc_interface *xch, uint32_t dom, struct restore_ctx *ctx, void *page)
-{
-    int i, rc, pte_last, nr_mfns = 0;
-    unsigned long pfn;
-    uint64_t pte;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    pte_last = PAGE_SIZE / 8;
-
-    /* First pass: work out how many (if any) MFNs we need to alloc */
-    for ( i = 0; i < pte_last; i++ )
-    {
-        pte = ((uint64_t *)page)[i];
-
-        /* XXX SMH: below needs fixing for PROT_NONE etc */
-        if ( !(pte & _PAGE_PRESENT) )
-            continue;
-        
-        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-        
-        if ( pfn >= dinfo->p2m_size )
-        {
-            /* This "page table page" is probably not one; bail. */
-            ERROR("Frame number in page table is out of range: "
-                  "i=%d pfn=0x%lx p2m_size=%lu",
-                  i, pfn, dinfo->p2m_size);
-            return 0;
-        }
-        
-        if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY )
-        {
-            /* Have a 'valid' PFN without a matching MFN - need to alloc */
-            ctx->p2m_batch[nr_mfns++] = pfn; 
-            ctx->p2m[pfn]--;
-        }
-    }
-
-    /* Allocate the requisite number of mfns. */
-    if (nr_mfns)
-    {
-        if (!ctx->hvm && ctx->superpages)
-            rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
-        else
-            rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
-                                                  ctx->p2m_batch);
-
-        if (rc)
-        {
-            ERROR("Failed to allocate memory for batch.!\n");
-            errno = ENOMEM;
-            return 0;
-        }
-    }
-    
-    /* Second pass: uncanonicalize each present PTE */
-    nr_mfns = 0;
-    for ( i = 0; i < pte_last; i++ )
-    {
-        pte = ((uint64_t *)page)[i];
-        
-        /* XXX SMH: below needs fixing for PROT_NONE etc */
-        if ( !(pte & _PAGE_PRESENT) )
-            continue;
-        
-        pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-
-        if ( ctx->p2m[pfn] == (INVALID_P2M_ENTRY-1) )
-            ctx->p2m[pfn] = ctx->p2m_batch[nr_mfns++];
-
-        pte &= ~MADDR_MASK_X86;
-        pte |= (uint64_t)ctx->p2m[pfn] << PAGE_SHIFT;
-
-        ((uint64_t *)page)[i] = (uint64_t)pte;
-    }
-
-    return 1;
-}
-
-
-/* Load the p2m frame list, plus potential extended info chunk */
-static xen_pfn_t *load_p2m_frame_list(
-    xc_interface *xch, struct restore_ctx *ctx,
-    int io_fd, int *pae_extended_cr3, int *ext_vcpucontext,
-    uint32_t *vcpuextstate_size)
-{
-    xen_pfn_t *p2m_frame_list;
-    vcpu_guest_context_any_t ctxt;
-    xen_pfn_t p2m_fl_zero;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    /* Read first entry of P2M list, or extended-info signature (~0UL). */
-    if ( RDEXACT(io_fd, &p2m_fl_zero, sizeof(long)) )
-    {
-        PERROR("read extended-info signature failed");
-        return NULL;
-    }
-    
-    if ( p2m_fl_zero == ~0UL )
-    {
-        uint32_t tot_bytes;
-        
-        /* Next 4 bytes: total size of following extended info. */
-        if ( RDEXACT(io_fd, &tot_bytes, sizeof(tot_bytes)) )
-        {
-            PERROR("read extended-info size failed");
-            return NULL;
-        }
-        
-        while ( tot_bytes )
-        {
-            uint32_t chunk_bytes;
-            char     chunk_sig[4];
-            
-            /* 4-character chunk signature + 4-byte remaining chunk size. */
-            if ( RDEXACT(io_fd, chunk_sig, sizeof(chunk_sig)) ||
-                 RDEXACT(io_fd, &chunk_bytes, sizeof(chunk_bytes)) ||
-                 (tot_bytes < (chunk_bytes + 8)) )
-            {
-                PERROR("read extended-info chunk signature failed");
-                return NULL;
-            }
-            tot_bytes -= 8;
-
-            /* VCPU context structure? */
-            if ( !strncmp(chunk_sig, "vcpu", 4) )
-            {
-                /* Pick a guest word-size and PT depth from the ctxt size */
-                if ( chunk_bytes == sizeof (ctxt.x32) )
-                {
-                    dinfo->guest_width = 4;
-                    ctx->pt_levels = 3;
-                }
-                else if ( chunk_bytes == sizeof (ctxt.x64) )
-                {
-                    dinfo->guest_width = 8;
-                    ctx->pt_levels = 4;
-                }
-                else 
-                {
-                    ERROR("bad extended-info context size %d", chunk_bytes);
-                    return NULL;
-                }
-
-                if ( RDEXACT(io_fd, &ctxt, chunk_bytes) )
-                {
-                    PERROR("read extended-info vcpu context failed");
-                    return NULL;
-                }
-                tot_bytes -= chunk_bytes;
-                chunk_bytes = 0;
-
-                if ( GET_FIELD(&ctxt, vm_assist, dinfo->guest_width)
-                     & (1UL << VMASST_TYPE_pae_extended_cr3) )
-                    *pae_extended_cr3 = 1;
-            }
-            else if ( !strncmp(chunk_sig, "extv", 4) )
-            {
-                *ext_vcpucontext = 1;
-            }
-            else if ( !strncmp(chunk_sig, "xcnt", 4) )
-            {
-                if ( RDEXACT(io_fd, vcpuextstate_size, 
sizeof(*vcpuextstate_size)) )
-                {
-                    PERROR("read extended vcpu state size failed");
-                    return NULL;
-                }
-                tot_bytes -= chunk_bytes;
-                chunk_bytes = 0;
-            }
-            
-            /* Any remaining bytes of this chunk: read and discard. */
-            while ( chunk_bytes )
-            {
-                unsigned long sz = min_t(unsigned long, chunk_bytes, 
sizeof(xen_pfn_t));
-                if ( RDEXACT(io_fd, &p2m_fl_zero, sz) )
-                {
-                    PERROR("read-and-discard extended-info chunk bytes 
failed");
-                    return NULL;
-                }
-                chunk_bytes -= sz;
-                tot_bytes   -= sz;
-            }
-        }
-
-        /* Now read the real first entry of P2M list. */
-        if ( RDEXACT(io_fd, &p2m_fl_zero, sizeof(xen_pfn_t)) )
-        {
-            PERROR("read first entry of p2m_frame_list failed");
-            return NULL;
-        }
-    }
-
-    /* Now that we know the guest's word-size, can safely allocate 
-     * the p2m frame list */
-    if ( (p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) == NULL )
-    {
-        ERROR("Couldn't allocate p2m_frame_list array");
-        return NULL;
-    }
-
-    /* First entry has already been read. */
-    p2m_frame_list[0] = p2m_fl_zero;
-    if ( RDEXACT(io_fd, &p2m_frame_list[1], 
-                 (P2M_FL_ENTRIES - 1) * sizeof(xen_pfn_t)) )
-    {
-        PERROR("read p2m_frame_list failed");
-        free(p2m_frame_list);
-        return NULL;
-    }
-    
-    return p2m_frame_list;
-}
-
-typedef struct {
-    int ishvm;
-    union {
-        struct tailbuf_pv {
-            unsigned int pfncount;
-            unsigned long* pfntab;
-            unsigned int vcpucount;
-            unsigned char* vcpubuf;
-            unsigned char shared_info_page[PAGE_SIZE];
-        } pv;
-        struct tailbuf_hvm {
-            uint64_t magicpfns[3];
-            uint32_t hvmbufsize, reclen;
-            uint8_t* hvmbuf;
-            struct {
-                uint32_t magic;
-                uint32_t version;
-                uint64_t len;
-            } qemuhdr;
-            uint32_t qemubufsize;
-            uint8_t* qemubuf;
-        } hvm;
-    } u;
-} tailbuf_t;
-
-/* read stream until EOF, growing buffer as necssary */
-static int compat_buffer_qemu(xc_interface *xch, struct restore_ctx *ctx,
-                              int fd, struct tailbuf_hvm *buf)
-{
-    uint8_t *qbuf, *tmp;
-    int blen = 0, dlen = 0;
-    int rc;
-
-    /* currently save records tend to be about 7K */
-    blen = 8192;
-    if ( !(qbuf = malloc(blen)) ) {
-        ERROR("Error allocating QEMU buffer");
-        return -1;
-    }
-
-    while( (rc = read(fd, qbuf+dlen, blen-dlen)) > 0 ) {
-        DPRINTF("Read %d bytes of QEMU data\n", rc);
-        dlen += rc;
-
-        if (dlen == blen) {
-            DPRINTF("%d-byte QEMU buffer full, reallocating...\n", dlen);
-            blen += 4096;
-            tmp = realloc(qbuf, blen);
-            if ( !tmp ) {
-                ERROR("Error growing QEMU buffer to %d bytes", blen);
-                free(qbuf);
-                return -1;
-            }
-            qbuf = tmp;
-        }
-    }
-
-    if ( rc < 0 ) {
-        ERROR("Error reading QEMU data");
-        free(qbuf);
-        return -1;
-    }
-
-    if ( memcmp(qbuf, "QEVM", 4) ) {
-        ERROR("Invalid QEMU magic: 0x%08"PRIx32, *(uint32_t*)qbuf);
-        free(qbuf);
-        return -1;
-    }
-
-    buf->qemubuf = qbuf;
-    buf->qemubufsize = dlen;
-
-    return 0;
-}
-
-static int buffer_qemu(xc_interface *xch, struct restore_ctx *ctx,
-                       int fd, struct tailbuf_hvm *buf)
-{
-    uint32_t qlen;
-    uint8_t *tmp;
-
-    if ( RDEXACT(fd, &qlen, sizeof(qlen)) ) {
-        PERROR("Error reading QEMU header length");
-        return -1;
-    }
-
-    if ( qlen > buf->qemubufsize ) {
-        if ( buf->qemubuf) {
-            tmp = realloc(buf->qemubuf, qlen);
-            if ( tmp )
-                buf->qemubuf = tmp;
-            else {
-                ERROR("Error reallocating QEMU state buffer");
-                return -1;
-            }
-        } else {
-            buf->qemubuf = malloc(qlen);
-            if ( !buf->qemubuf ) {
-                ERROR("Error allocating QEMU state buffer");
-                return -1;
-            }
-        }
-    }
-    buf->qemubufsize = qlen;
-
-    if ( RDEXACT(fd, buf->qemubuf, buf->qemubufsize) ) {
-        PERROR("Error reading QEMU state");
-        return -1;
-    }
-
-    return 0;
-}
-
-static int dump_qemu(xc_interface *xch, uint32_t dom, struct tailbuf_hvm *buf)
-{
-    int saved_errno;
-    char path[256];
-    FILE *fp;
-
-    sprintf(path, XC_DEVICE_MODEL_RESTORE_FILE".%u", dom);
-    fp = fopen(path, "wb");
-    if ( !fp )
-        return -1;
-
-    DPRINTF("Writing %d bytes of QEMU data\n", buf->qemubufsize);
-    if ( fwrite(buf->qemubuf, 1, buf->qemubufsize, fp) != buf->qemubufsize) {
-        saved_errno = errno;
-        fclose(fp);
-        errno = saved_errno;
-        return -1;
-    }
-
-    fclose(fp);
-
-    return 0;
-}
-
-static int buffer_tail_hvm(xc_interface *xch, struct restore_ctx *ctx,
-                           struct tailbuf_hvm *buf, int fd,
-                           unsigned int max_vcpu_id, uint64_t *vcpumap,
-                           int ext_vcpucontext,
-                           uint32_t vcpuextstate_size)
-{
-    uint8_t *tmp;
-    unsigned char qemusig[21];
-
-    if ( RDEXACT(fd, buf->magicpfns, sizeof(buf->magicpfns)) ) {
-        PERROR("Error reading magic PFNs");
-        return -1;
-    }
-
-    if ( RDEXACT(fd, &buf->reclen, sizeof(buf->reclen)) ) {
-        PERROR("Error reading HVM params size");
-        return -1;
-    }
-
-    if ( buf->reclen > buf->hvmbufsize ) {
-        if ( buf->hvmbuf) {
-            tmp = realloc(buf->hvmbuf, buf->reclen);
-            if ( tmp ) {
-                buf->hvmbuf = tmp;
-                buf->hvmbufsize = buf->reclen;
-            } else {
-                ERROR("Error reallocating HVM param buffer");
-                return -1;
-            }
-        } else {
-            buf->hvmbuf = malloc(buf->reclen);
-            if ( !buf->hvmbuf ) {
-                ERROR("Error allocating HVM param buffer");
-                return -1;
-            }
-            buf->hvmbufsize = buf->reclen;
-        }
-    }
-
-    if ( RDEXACT(fd, buf->hvmbuf, buf->reclen) ) {
-        PERROR("Error reading HVM params");
-        return -1;
-    }
-
-    if ( RDEXACT(fd, qemusig, sizeof(qemusig)) ) {
-        PERROR("Error reading QEMU signature");
-        return -1;
-    }
-
-    /* The legacy live-migration QEMU record has no length information.
-     * Short of reimplementing the QEMU parser, we're forced to just read
-     * until EOF.
-     *
-     * Gets around this by sending a different signatures for the new
-     * live-migration QEMU record and Remus which includes a length
-     * prefix
-     */
-    if ( !memcmp(qemusig, "QemuDeviceModelRecord", sizeof(qemusig)) )
-        return compat_buffer_qemu(xch, ctx, fd, buf);
-    else if ( !memcmp(qemusig, "DeviceModelRecord0002", sizeof(qemusig)) ||
-              !memcmp(qemusig, "RemusDeviceModelState", sizeof(qemusig)) )
-        return buffer_qemu(xch, ctx, fd, buf);
-
-    qemusig[20] = '\0';
-    ERROR("Invalid QEMU signature: %s", qemusig);
-    return -1;
-}
-
-static int buffer_tail_pv(xc_interface *xch, struct restore_ctx *ctx,
-                          struct tailbuf_pv *buf, int fd,
-                          unsigned int max_vcpu_id, uint64_t *vcpumap,
-                          int ext_vcpucontext,
-                          uint32_t vcpuextstate_size)
-{
-    unsigned int i;
-    size_t pfnlen, vcpulen;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    /* TODO: handle changing pfntab and vcpu counts */
-    /* PFN tab */
-    if ( RDEXACT(fd, &buf->pfncount, sizeof(buf->pfncount)) ||
-         (buf->pfncount > (1U << 28)) ) /* up to 1TB of address space */
-    {
-        PERROR("Error when reading pfn count");
-        return -1;
-    }
-    pfnlen = sizeof(unsigned long) * buf->pfncount;
-    if ( !(buf->pfntab) ) {
-        if ( !(buf->pfntab = malloc(pfnlen)) ) {
-            ERROR("Error allocating PFN tail buffer");
-            return -1;
-        }
-    }
-    // DPRINTF("Reading PFN tab: %d bytes\n", pfnlen);
-    if ( RDEXACT(fd, buf->pfntab, pfnlen) ) {
-        PERROR("Error when reading pfntab");
-        goto free_pfntab;
-    }
-
-    /* VCPU contexts */
-    buf->vcpucount = 0;
-    for (i = 0; i <= max_vcpu_id; i++) {
-        // DPRINTF("vcpumap: %llx, cpu: %d, bit: %llu\n", vcpumap[i/64], i, 
(vcpumap[i/64] & (1ULL << (i%64))));
-        if ( (!(vcpumap[i/64] & (1ULL << (i%64)))) )
-            continue;
-        buf->vcpucount++;
-    }
-    // DPRINTF("VCPU count: %d\n", buf->vcpucount);
-    vcpulen = ((dinfo->guest_width == 8) ? sizeof(vcpu_guest_context_x86_64_t)
-               : sizeof(vcpu_guest_context_x86_32_t)) * buf->vcpucount;
-    if ( ext_vcpucontext )
-        vcpulen += 128 * buf->vcpucount;
-    vcpulen += vcpuextstate_size * buf->vcpucount;
-
-    if ( !(buf->vcpubuf) ) {
-        if ( !(buf->vcpubuf = malloc(vcpulen)) ) {
-            ERROR("Error allocating VCPU ctxt tail buffer");
-            goto free_pfntab;
-        }
-    }
-    // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
-    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
-        PERROR("Error when reading ctxt");
-        goto free_vcpus;
-    }
-
-    /* load shared_info_page */
-    // DPRINTF("Reading shared info: %lu bytes\n", PAGE_SIZE);
-    if ( RDEXACT(fd, buf->shared_info_page, PAGE_SIZE) ) {
-        PERROR("Error when reading shared info page");
-        goto free_vcpus;
-    }
-
-    return 0;
-
-  free_vcpus:
-    if (buf->vcpubuf) {
-        free (buf->vcpubuf);
-        buf->vcpubuf = NULL;
-    }
-  free_pfntab:
-    if (buf->pfntab) {
-        free (buf->pfntab);
-        buf->pfntab = NULL;
-    }
-
-    return -1;
-}
-
-static int buffer_tail(xc_interface *xch, struct restore_ctx *ctx,
-                       tailbuf_t *buf, int fd, unsigned int max_vcpu_id,
-                       uint64_t *vcpumap, int ext_vcpucontext,
-                       uint32_t vcpuextstate_size)
-{
-    if ( buf->ishvm )
-        return buffer_tail_hvm(xch, ctx, &buf->u.hvm, fd, max_vcpu_id, vcpumap,
-                               ext_vcpucontext, vcpuextstate_size);
-    else
-        return buffer_tail_pv(xch, ctx, &buf->u.pv, fd, max_vcpu_id, vcpumap,
-                              ext_vcpucontext, vcpuextstate_size);
-}
-
-static void tailbuf_free_hvm(struct tailbuf_hvm *buf)
-{
-    if ( buf->hvmbuf ) {
-        free(buf->hvmbuf);
-        buf->hvmbuf = NULL;
-    }
-    if ( buf->qemubuf ) {
-        free(buf->qemubuf);
-        buf->qemubuf = NULL;
-    }
-}
-
-static void tailbuf_free_pv(struct tailbuf_pv *buf)
-{
-    if ( buf->vcpubuf ) {
-        free(buf->vcpubuf);
-        buf->vcpubuf = NULL;
-    }
-    if ( buf->pfntab ) {
-        free(buf->pfntab);
-        buf->pfntab = NULL;
-    }
-}
-
-static void tailbuf_free(tailbuf_t *buf)
-{
-    if ( buf->ishvm )
-        tailbuf_free_hvm(&buf->u.hvm);
-    else
-        tailbuf_free_pv(&buf->u.pv);
-}
-
-struct toolstack_data_t {
-    uint8_t *data;
-    uint32_t len;
-};
-
-typedef struct {
-    void* pages;
-    /* pages is of length nr_physpages, pfn_types is of length nr_pages */
-    unsigned int nr_physpages, nr_pages;
-
-    /* checkpoint compression state */
-    int compressing;
-    unsigned long compbuf_pos, compbuf_size;
-
-    /* Types of the pfns in the current region */
-    unsigned long* pfn_types;
-
-    int verify;
-
-    int new_ctxt_format;
-    int max_vcpu_id;
-    uint64_t vcpumap[XC_SR_MAX_VCPUS/64];
-    uint64_t identpt;
-    uint64_t paging_ring_pfn;
-    uint64_t monitor_ring_pfn;
-    uint64_t sharing_ring_pfn;
-    uint64_t vm86_tss;
-    uint64_t console_pfn;
-    uint64_t acpi_ioport_location;
-    uint64_t viridian;
-    uint64_t vm_generationid_addr;
-    uint64_t ioreq_server_pfn;
-    uint64_t nr_ioreq_server_pages;
-
-    struct toolstack_data_t tdata;
-} pagebuf_t;
-
-static int pagebuf_init(pagebuf_t* buf)
-{
-    memset(buf, 0, sizeof(*buf));
-    return 0;
-}
-
-static void pagebuf_free(pagebuf_t* buf)
-{
-    if (buf->tdata.data != NULL) {
-        free(buf->tdata.data);
-        buf->tdata.data = NULL;
-    }
-    if (buf->pages) {
-        free(buf->pages);
-        buf->pages = NULL;
-    }
-    if(buf->pfn_types) {
-        free(buf->pfn_types);
-        buf->pfn_types = NULL;
-    }
-}
-
-static int pagebuf_get_one(xc_interface *xch, struct restore_ctx *ctx,
-                           pagebuf_t* buf, int fd, uint32_t dom)
-{
-    int count, countpages, oldcount, i;
-    void* ptmp;
-    unsigned long compbuf_size;
-
-    if ( RDEXACT(fd, &count, sizeof(count)) )
-    {
-        PERROR("Error when reading batch size");
-        return -1;
-    }
-
-    // DPRINTF("reading batch of %d pages\n", count);
-
-    switch ( count )
-    {
-    case 0:
-        // DPRINTF("Last batch read\n");
-        return 0;
-
-    case XC_SAVE_ID_ENABLE_VERIFY_MODE:
-        DPRINTF("Entering page verify mode\n");
-        buf->verify = 1;
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_VCPU_INFO:
-        buf->new_ctxt_format = 1;
-        if ( RDEXACT(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) ||
-             buf->max_vcpu_id >= XC_SR_MAX_VCPUS ||
-             RDEXACT(fd, buf->vcpumap, vcpumap_sz(buf->max_vcpu_id)) ) {
-            PERROR("Error when reading max_vcpu_id");
-            return -1;
-        }
-        // DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id, 
buf->vcpumap[0]);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_IDENT_PT:
-        /* Skip padding 4 bytes then read the EPT identity PT location. */
-        if ( RDEXACT(fd, &buf->identpt, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->identpt, sizeof(uint64_t)) )
-        {
-            PERROR("error read the address of the EPT identity map");
-            return -1;
-        }
-        // DPRINTF("EPT identity map address: %llx\n", buf->identpt);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_PAGING_RING_PFN:
-        /* Skip padding 4 bytes then read the paging ring location. */
-        if ( RDEXACT(fd, &buf->paging_ring_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->paging_ring_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the paging ring pfn");
-            return -1;
-        }
-        // DPRINTF("paging ring pfn address: %llx\n", buf->paging_ring_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_MONITOR_RING_PFN:
-        /* Skip padding 4 bytes then read the mem access ring location. */
-        if ( RDEXACT(fd, &buf->monitor_ring_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->monitor_ring_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the access ring pfn");
-            return -1;
-        }
-        // DPRINTF("monitor ring pfn address: %llx\n", buf->monitor_ring_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_SHARING_RING_PFN:
-        /* Skip padding 4 bytes then read the sharing ring location. */
-        if ( RDEXACT(fd, &buf->sharing_ring_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->sharing_ring_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the sharing ring pfn");
-            return -1;
-        }
-        // DPRINTF("sharing ring pfn address: %llx\n", buf->sharing_ring_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_VM86_TSS:
-        /* Skip padding 4 bytes then read the vm86 TSS location. */
-        if ( RDEXACT(fd, &buf->vm86_tss, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->vm86_tss, sizeof(uint64_t)) )
-        {
-            PERROR("error read the address of the vm86 TSS");
-            return -1;
-        }
-        // DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TMEM:
-        DPRINTF("xc_domain_restore start tmem\n");
-        if ( xc_tmem_restore(xch, dom, fd) ) {
-            PERROR("error reading/restoring tmem");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TMEM_EXTRA:
-        if ( xc_tmem_restore_extra(xch, dom, fd) ) {
-            PERROR("error reading/restoring tmem extra");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TSC_INFO:
-    {
-        uint32_t tsc_mode, khz, incarn;
-        uint64_t nsec;
-        if ( RDEXACT(fd, &tsc_mode, sizeof(uint32_t)) ||
-             RDEXACT(fd, &nsec, sizeof(uint64_t)) ||
-             RDEXACT(fd, &khz, sizeof(uint32_t)) ||
-             RDEXACT(fd, &incarn, sizeof(uint32_t)) ||
-             xc_domain_set_tsc_info(xch, dom, tsc_mode, nsec, khz, incarn) ) {
-            PERROR("error reading/restoring tsc info");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-    }
-
-    case XC_SAVE_ID_HVM_CONSOLE_PFN :
-        /* Skip padding 4 bytes then read the console pfn location. */
-        if ( RDEXACT(fd, &buf->console_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->console_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the address of the console pfn");
-            return -1;
-        }
-        // DPRINTF("console pfn location: %llx\n", buf->console_pfn);
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_LAST_CHECKPOINT:
-        ctx->last_checkpoint = 1;
-        // DPRINTF("last checkpoint indication received");
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION:
-        /* Skip padding 4 bytes then read the acpi ioport location. */
-        if ( RDEXACT(fd, &buf->acpi_ioport_location, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->acpi_ioport_location, sizeof(uint64_t)) )
-        {
-            PERROR("error read the acpi ioport location");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_VIRIDIAN:
-        /* Skip padding 4 bytes then read the acpi ioport location. */
-        if ( RDEXACT(fd, &buf->viridian, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->viridian, sizeof(uint64_t)) )
-        {
-            PERROR("error reading the viridian enlightenments");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_TOOLSTACK:
-        {
-            if ( RDEXACT(fd, &buf->tdata.len, sizeof(buf->tdata.len)) )
-            {
-                PERROR("error read toolstack id size");
-                return -1;
-            }
-            buf->tdata.data = (uint8_t*) realloc(buf->tdata.data, 
buf->tdata.len);
-            if ( buf->tdata.data == NULL )
-            {
-                PERROR("error memory allocation");
-                return -1;
-            }
-            if ( RDEXACT(fd, buf->tdata.data, buf->tdata.len) )
-            {
-                PERROR("error read toolstack id");
-                return -1;
-            }
-            return pagebuf_get_one(xch, ctx, buf, fd, dom);
-        }
-
-    case XC_SAVE_ID_ENABLE_COMPRESSION:
-        /* We cannot set compression flag directly in pagebuf structure,
-         * since this pagebuf still has uncompressed pages that are yet to
-         * be applied. We enable the compression field in pagebuf structure
-         * after receiving the first tailbuf.
-         */
-        ctx->compressing = 1;
-        // DPRINTF("compression flag received");
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_COMPRESSED_DATA:
-
-        /* read the length of compressed chunk coming in */
-        if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) )
-        {
-            PERROR("Error when reading compbuf_size");
-            return -1;
-        }
-        if (!compbuf_size) return 1;
-
-        buf->compbuf_size += compbuf_size;
-        if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) {
-            ERROR("Could not (re)allocate compression buffer");
-            return -1;
-        }
-        buf->pages = ptmp;
-
-        if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size),
-                     compbuf_size) ) {
-            PERROR("Error when reading compression buffer");
-            return -1;
-        }
-        return compbuf_size;
-
-    case XC_SAVE_ID_HVM_GENERATION_ID_ADDR:
-        /* Skip padding 4 bytes then read the generation id buffer location. */
-        if ( RDEXACT(fd, &buf->vm_generationid_addr, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->vm_generationid_addr, sizeof(uint64_t)) )
-        {
-            PERROR("error read the generation id buffer location");
-            return -1;
-        }
-        DPRINTF("read generation id buffer address");
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_IOREQ_SERVER_PFN:
-        /* Skip padding 4 bytes then read the ioreq server gmfn base. */
-        if ( RDEXACT(fd, &buf->ioreq_server_pfn, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->ioreq_server_pfn, sizeof(uint64_t)) )
-        {
-            PERROR("error read the ioreq server gmfn base");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    case XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES:
-        /* Skip padding 4 bytes then read the ioreq server gmfn count. */
-        if ( RDEXACT(fd, &buf->nr_ioreq_server_pages, sizeof(uint32_t)) ||
-             RDEXACT(fd, &buf->nr_ioreq_server_pages, sizeof(uint64_t)) )
-        {
-            PERROR("error read the ioreq server gmfn count");
-            return -1;
-        }
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    default:
-        if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
-            ERROR("Max batch size exceeded (%d). Giving up.", count);
-            errno = EMSGSIZE;
-            return -1;
-        }
-        break;
-    }
-
-    oldcount = buf->nr_pages;
-    buf->nr_pages += count;
-    if (!buf->pfn_types) {
-        if (!(buf->pfn_types = malloc(buf->nr_pages * 
sizeof(*(buf->pfn_types))))) {
-            ERROR("Could not allocate PFN type buffer");
-            return -1;
-        }
-    } else {
-        if (!(ptmp = realloc(buf->pfn_types, buf->nr_pages * 
sizeof(*(buf->pfn_types))))) {
-            ERROR("Could not reallocate PFN type buffer");
-            return -1;
-        }
-        buf->pfn_types = ptmp;
-    }
-    if ( RDEXACT(fd, buf->pfn_types + oldcount, count * 
sizeof(*(buf->pfn_types)))) {
-        PERROR("Error when reading region pfn types");
-        return -1;
-    }
-
-    countpages = count;
-    for (i = oldcount; i < buf->nr_pages; ++i)
-    {
-        unsigned long pagetype;
-
-        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
-        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
-             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
-             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
-            --countpages;
-    }
-
-    if (!countpages)
-        return count;
-
-    /* If Remus Checkpoint Compression is turned on, we will only be
-     * receiving the pfn lists now. The compressed pages will come in later,
-     * following a <XC_SAVE_ID_COMPRESSED_DATA, compressedChunkSize> tuple.
-     */
-    if (buf->compressing)
-        return pagebuf_get_one(xch, ctx, buf, fd, dom);
-
-    oldcount = buf->nr_physpages;
-    buf->nr_physpages += countpages;
-    if (!buf->pages) {
-        if (!(buf->pages = malloc(buf->nr_physpages * PAGE_SIZE))) {
-            ERROR("Could not allocate page buffer");
-            return -1;
-        }
-    } else {
-        if (!(ptmp = realloc(buf->pages, buf->nr_physpages * PAGE_SIZE))) {
-            ERROR("Could not reallocate page buffer");
-            return -1;
-        }
-        buf->pages = ptmp;
-    }
-    if ( RDEXACT(fd, buf->pages + oldcount * PAGE_SIZE, countpages * 
PAGE_SIZE) ) {
-        PERROR("Error when reading pages");
-        return -1;
-    }
-
-    return count;
-}
-
-static int pagebuf_get(xc_interface *xch, struct restore_ctx *ctx,
-                       pagebuf_t* buf, int fd, uint32_t dom)
-{
-    int rc;
-
-    buf->nr_physpages = buf->nr_pages = 0;
-    buf->compbuf_pos = buf->compbuf_size = 0;
-
-    do {
-        rc = pagebuf_get_one(xch, ctx, buf, fd, dom);
-    } while (rc > 0);
-
-    if (rc < 0)
-        pagebuf_free(buf);
-
-    return rc;
-}
-
-static int apply_batch(xc_interface *xch, uint32_t dom, struct restore_ctx 
*ctx,
-                       xen_pfn_t* region_mfn, unsigned long* pfn_type, int 
pae_extended_cr3,
-                       struct xc_mmu* mmu,
-                       pagebuf_t* pagebuf, int curbatch, int *invalid_pages)
-{
-    int i, j, curpage, nr_mfns;
-    int k, scount;
-    unsigned long superpage_start=INVALID_P2M_ENTRY;
-    /* used by debug verify code */
-    unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
-    /* Our mapping of the current region (batch) */
-    char *region_base;
-    /* A temporary mapping, and a copy, of one frame of guest memory. */
-    unsigned long *page = NULL;
-    int nraces = 0;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-    int* pfn_err = NULL;
-    int rc = -1;
-    int local_invalid_pages = 0;
-    /* We have handled curbatch pages before this batch, and there are
-     * *invalid_pages pages that are not in pagebuf->pages. So the first
-     * page for this page is (curbatch - *invalid_pages) page.
-     */
-    int first_page = curbatch - *invalid_pages;
-
-    unsigned long mfn, pfn, pagetype;
-
-    j = pagebuf->nr_pages - curbatch;
-    if (j > MAX_BATCH_SIZE)
-        j = MAX_BATCH_SIZE;
-
-    /* First pass for this batch: work out how much memory to alloc, and 
detect superpages */
-    nr_mfns = scount = 0;
-    for ( i = 0; i < j; i++ )
-    {
-        unsigned long pfn, pagetype;
-        pfn      = pagebuf->pfn_types[i + curbatch] & 
~XEN_DOMCTL_PFINFO_LTAB_MASK;
-        pagetype = pagebuf->pfn_types[i + curbatch] &  
XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-        /* For allocation purposes, treat XEN_DOMCTL_PFINFO_XALLOC as a normal 
page */
-        if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && 
-             (ctx->p2m[pfn] == INVALID_P2M_ENTRY) )
-        {
-            /* Have a live PFN which hasn't had an MFN allocated */
-
-            /* Logic if we're in the middle of detecting a candidate superpage 
*/
-            if ( superpage_start != INVALID_P2M_ENTRY )
-            {
-                /* Is this the next expected continuation? */
-                if ( pfn == superpage_start + scount )
-                {
-                    if ( !ctx->superpages )
-                    {
-                        ERROR("Unexpexted codepath with no superpages");
-                        return -1;
-                    }
-
-                    scount++;
-
-                    /* If we've found a whole superpage, allocate it and 
update p2m */
-                    if ( scount  == SUPERPAGE_NR_PFNS )
-                    {
-                        unsigned long supermfn;
-
-
-                        supermfn=superpage_start;
-                        if ( xc_domain_populate_physmap_exact(xch, dom, 1,
-                                         SUPERPAGE_PFN_SHIFT, 0, &supermfn) != 
0 )
-                        {
-                            DPRINTF("No 2M page available for pfn 0x%lx, fall 
back to 4K page.\n",
-                                    superpage_start);
-                            /* If we're falling back from a failed allocation, 
subtract one
-                             * from count, since the last page == pfn, which 
will behandled
-                             * anyway. */
-                            scount--;
-                            goto fallback;
-                        }
-
-                        DPRINTF("Mapping superpage (%d) pfn %lx, mfn %lx\n", 
scount, superpage_start, supermfn);
-                        for (k=0; k<scount; k++)
-                        {
-                            /* We just allocated a new mfn above; update p2m */
-                            ctx->p2m[superpage_start+k] = supermfn+k;
-                            ctx->nr_pfns++;
-                            /* region_map[] will be set below */
-                        }
-                        superpage_start=INVALID_P2M_ENTRY;
-                        scount=0;
-                    }
-                    continue;
-                }
-                
-            fallback:
-                DPRINTF("Falling back %d pages pfn %lx\n", scount, 
superpage_start);
-                for (k=0; k<scount; k++)
-                {
-                    ctx->p2m_batch[nr_mfns++] = superpage_start+k; 
-                    ctx->p2m[superpage_start+k]--;
-                }
-                superpage_start = INVALID_P2M_ENTRY;
-                scount=0;
-            }
-
-            /* Are we ready to start a new superpage candidate? */
-            if ( ctx->hvm && ctx->superpages && SUPER_PAGE_START(pfn) )
-            {
-                superpage_start=pfn;
-                scount++;
-            }
-            else
-            {
-                /* Add the current pfn to pfn_batch */
-                ctx->p2m_batch[nr_mfns++] = pfn;
-                ctx->p2m[pfn]--;
-            }
-        }
-    }
-
-    /* Clean up any partial superpage candidates */
-    if ( superpage_start != INVALID_P2M_ENTRY )
-    {
-        DPRINTF("Falling back %d pages pfn %lx\n", scount, superpage_start);
-        for (k=0; k<scount; k++)
-        {
-            ctx->p2m_batch[nr_mfns++] = superpage_start+k; 
-            ctx->p2m[superpage_start+k]--;
-        }
-        superpage_start = INVALID_P2M_ENTRY;
-    }
-
-    /* Now allocate a bunch of mfns for this batch */
-    if ( nr_mfns )
-    {
-        DPRINTF("Mapping order 0,  %d; first pfn %lx\n", nr_mfns, 
ctx->p2m_batch[0]);
-    
-        if (!ctx->hvm && ctx->superpages)
-            rc = alloc_superpage_mfns(xch, dom, ctx, nr_mfns);
-        else
-            rc = xc_domain_populate_physmap_exact(xch, dom, nr_mfns, 0, 0,
-                                                  ctx->p2m_batch);
-
-        if (rc)
-        {
-            ERROR("Failed to allocate memory for batch.!\n"); 
-            errno = ENOMEM;
-            return -1;
-        }
-    }
-
-    /* Second pass for this batch: update p2m[] and region_mfn[] */
-    nr_mfns = 0; 
-    for ( i = 0; i < j; i++ )
-    {
-        unsigned long pfn, pagetype;
-        pfn      = pagebuf->pfn_types[i + curbatch] & 
~XEN_DOMCTL_PFINFO_LTAB_MASK;
-        pagetype = pagebuf->pfn_types[i + curbatch] &  
XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-        if ( pagetype != XEN_DOMCTL_PFINFO_XTAB
-             && ctx->p2m[pfn] == (INVALID_P2M_ENTRY-1) )
-        {
-            /* We just allocated a new mfn above; update p2m */
-            ctx->p2m[pfn] = ctx->p2m_batch[nr_mfns++]; 
-            ctx->nr_pfns++; 
-        }
-
-        /* setup region_mfn[] for batch map, if necessary.
-         * For HVM guests, this interface takes PFNs, not MFNs */
-        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
-             || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
-            region_mfn[i] = ~0UL; /* map will fail but we don't care */
-        else
-            region_mfn[i] = ctx->hvm ? pfn : ctx->p2m[pfn];
-    }
-
-    /* Map relevant mfns */
-    pfn_err = calloc(j, sizeof(*pfn_err));
-    if ( pfn_err == NULL )
-    {
-        PERROR("allocation for pfn_err failed");
-        return -1;
-    }
-    region_base = xc_map_foreign_bulk(
-        xch, dom, PROT_WRITE, region_mfn, pfn_err, j);
-
-    if ( region_base == NULL )
-    {
-        PERROR("map batch failed");
-        free(pfn_err);
-        return -1;
-    }
-
-    for ( i = 0, curpage = -1; i < j; i++ )
-    {
-        pfn      = pagebuf->pfn_types[i + curbatch] & 
~XEN_DOMCTL_PFINFO_LTAB_MASK;
-        pagetype = pagebuf->pfn_types[i + curbatch] &  
XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
-             || pagetype == XEN_DOMCTL_PFINFO_XALLOC)
-        {
-            local_invalid_pages++;
-            /* a bogus/unmapped/allocate-only page: skip it */
-            continue;
-        }
-
-        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
-        {
-            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
-            {
-                ERROR("Set p2m for broken page failed, "
-                      "dom=%d, pfn=%lx\n", dom, pfn);
-                goto err_mapped;
-            }
-
-            local_invalid_pages++;
-            continue;
-        }
-
-        if (pfn_err[i])
-        {
-            ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn 
%lx",
-                  pfn, region_mfn[i], ctx->p2m[pfn]);
-            goto err_mapped;
-        }
-
-        ++curpage;
-
-        if ( pfn > dinfo->p2m_size )
-        {
-            ERROR("pfn out of range");
-            goto err_mapped;
-        }
-
-        pfn_type[pfn] = pagetype;
-
-        mfn = ctx->p2m[pfn];
-
-        /* In verify mode, we use a copy; otherwise we work in place */
-        page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
-
-        /* Remus - page decompression */
-        if (pagebuf->compressing)
-        {
-            if (xc_compression_uncompress_page(xch, pagebuf->pages,
-                                               pagebuf->compbuf_size,
-                                               &pagebuf->compbuf_pos,
-                                               (char *)page))
-            {
-                ERROR("Failed to uncompress page (pfn=%lx)\n", pfn);
-                goto err_mapped;
-            }
-        }
-        else
-            memcpy(page, pagebuf->pages + (first_page + curpage) * PAGE_SIZE,
-                   PAGE_SIZE);
-
-        pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
-        if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
-             (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
-        {
-            /*
-            ** A page table page - need to 'uncanonicalize' it, i.e.
-            ** replace all the references to pfns with the corresponding
-            ** mfns for the new domain.
-            **
-            ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
-            ** so we may need to update the p2m after the main loop.
-            ** Hence we defer canonicalization of L1s until then.
-            */
-            if ((ctx->pt_levels != 3) ||
-                pae_extended_cr3 ||
-                (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
-
-                if (!uncanonicalize_pagetable(xch, dom, ctx, page)) {
-                    /*
-                    ** Failing to uncanonicalize a page table can be ok
-                    ** under live migration since the pages type may have
-                    ** changed by now (and we'll get an update later).
-                    */
-                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                            pagetype >> 28, pfn, mfn);
-                    nraces++;
-                    continue;
-                }
-            }
-        }
-        else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
-        {
-            ERROR("Bogus page type %lx page table is out of range: "
-                  "i=%d p2m_size=%lu", pagetype, i, dinfo->p2m_size);
-            goto err_mapped;
-        }
-
-        if ( pagebuf->verify )
-        {
-            int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
-            if ( res )
-            {
-                int v;
-
-                DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
-                        "actualcs=%08lx\n", pfn, pfn_type[pfn],
-                        csum_page(region_base + i * PAGE_SIZE),
-                        csum_page(buf));
-
-                for ( v = 0; v < 4; v++ )
-                {
-                    unsigned long *p = (unsigned long *)
-                        (region_base + i*PAGE_SIZE);
-                    if ( buf[v] != p[v] )
-                        DPRINTF("    %d: %08lx %08lx\n", v, buf[v], p[v]);
-                }
-            }
-        }
-
-        if ( !ctx->hvm &&
-             xc_add_mmu_update(xch, mmu,
-                               (((unsigned long long)mfn) << PAGE_SHIFT)
-                               | MMU_MACHPHYS_UPDATE, pfn) )
-        {
-            PERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
-            goto err_mapped;
-        }
-    } /* end of 'batch' for loop */
-
-    rc = nraces;
-    *invalid_pages += local_invalid_pages;
-
-  err_mapped:
-    munmap(region_base, j*PAGE_SIZE);
-    free(pfn_err);
-
-    return rc;
-}
-
-int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
-                      unsigned int store_evtchn, unsigned long *store_mfn,
-                      domid_t store_domid, unsigned int console_evtchn,
-                      unsigned long *console_mfn, domid_t console_domid,
-                      unsigned int hvm, unsigned int pae, int superpages,
-                      int checkpointed_stream,
-                      struct restore_callbacks *callbacks)
-{
-    DECLARE_DOMCTL;
-    xc_dominfo_t info;
-    int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
-    uint32_t vcpuextstate_size = 0;
-    unsigned long mfn, pfn;
-    int nraces = 0;
-
-    /* The new domain's shared-info frame number. */
-    unsigned long shared_info_frame;
-    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
-    shared_info_any_t *old_shared_info = 
-        (shared_info_any_t *)shared_info_page;
-    shared_info_any_t *new_shared_info;
-
-    /* A copy of the CPU context of the guest. */
-    DECLARE_HYPERCALL_BUFFER(vcpu_guest_context_any_t, ctxt);
-
-    /* A copy of the CPU eXtended States of the guest. */
-    DECLARE_HYPERCALL_BUFFER(void, buffer);
-
-    /* A table containing the type of each PFN (/not/ MFN!). */
-    unsigned long *pfn_type = NULL;
-
-    /* A table of MFNs to map in the current region */
-    xen_pfn_t *region_mfn = NULL;
-
-    /* A copy of the pfn-to-mfn table frame list. */
-    xen_pfn_t *p2m_frame_list = NULL;
-    
-    /* A temporary mapping of the guest's start_info page. */
-    start_info_any_t *start_info;
-
-    /* Our mapping of the current region (batch) */
-    char *region_base;
-
-    struct xc_mmu *mmu = NULL;
-
-    struct mmuext_op pin[MAX_PIN_BATCH];
-    unsigned int nr_pins;
-
-    uint64_t vcpumap[XC_SR_MAX_VCPUS/64] = { 1ULL };
-    unsigned int max_vcpu_id = 0;
-    int new_ctxt_format = 0;
-
-    pagebuf_t pagebuf;
-    tailbuf_t tailbuf, tmptail;
-    struct toolstack_data_t tdata, tdatatmp;
-    void* vcpup;
-    uint64_t console_pfn = 0;
-
-    int orig_io_fd_flags;
-
-    struct restore_ctx _ctx;
-    struct restore_ctx *ctx = &_ctx;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    if ( getenv("XG_MIGRATION_V2") )
-    {
-        return xc_domain_restore2(
-            xch, io_fd, dom, store_evtchn, store_mfn,
-            store_domid, console_evtchn, console_mfn, console_domid,
-            hvm,  pae,  superpages, checkpointed_stream, callbacks);
-    }
-
-    DPRINTF("%s: starting restore of new domid %u", __func__, dom);
-
-    pagebuf_init(&pagebuf);
-    memset(&tailbuf, 0, sizeof(tailbuf));
-    tailbuf.ishvm = hvm;
-    memset(&tdata, 0, sizeof(tdata));
-
-    memset(ctx, 0, sizeof(*ctx));
-
-    ctx->superpages = superpages;
-    ctx->hvm = hvm;
-    ctx->last_checkpoint = !checkpointed_stream;
-
-    ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt));
-
-    if ( ctxt == NULL )
-    {
-        PERROR("Unable to allocate VCPU ctxt buffer");
-        return 1;
-    }
-
-
-    if ( (orig_io_fd_flags = fcntl(io_fd, F_GETFL, 0)) < 0 ) {
-        PERROR("unable to read IO FD flags");
-        goto out;
-    }
-
-    if ( RDEXACT(io_fd, &dinfo->p2m_size, sizeof(unsigned long)) )
-    {
-        PERROR("read: p2m_size");
-        goto out;
-    }
-    DPRINTF("%s: p2m_size = %lx\n", __func__, dinfo->p2m_size);
-
-    if ( !get_platform_info(xch, dom,
-                            &ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels, 
&dinfo->guest_width) )
-    {
-        ERROR("Unable to get platform info.");
-        return 1;
-    }
-    
-    /* The *current* word size of the guest isn't very interesting; for now
-     * assume the guest will be the same as we are.  We'll fix that later
-     * if we discover otherwise. */
-    dinfo->guest_width = sizeof(unsigned long);
-    ctx->pt_levels = (dinfo->guest_width == 8) ? 4 : 3;
-    
-    if ( !hvm ) 
-    {
-        /* Load the p2m frame list, plus potential extended info chunk */
-        p2m_frame_list = load_p2m_frame_list(xch, ctx,
-            io_fd, &pae_extended_cr3, &ext_vcpucontext,
-            &vcpuextstate_size);
-
-        if ( !p2m_frame_list )
-            goto out;
-
-        /* Now that we know the word size, tell Xen about it */
-        memset(&domctl, 0, sizeof(domctl));
-        domctl.domain = dom;
-        domctl.cmd    = XEN_DOMCTL_set_address_size;
-        domctl.u.address_size.size = dinfo->guest_width * 8;
-        frc = do_domctl(xch, &domctl);
-        if ( frc != 0 )
-        {
-            PERROR("Unable to set guest address size.");
-            goto out;
-        }
-    }
-
-    /* We want zeroed memory so use calloc rather than malloc. */
-    ctx->p2m   = calloc(dinfo->p2m_size, sizeof(xen_pfn_t));
-    pfn_type   = calloc(dinfo->p2m_size, sizeof(unsigned long));
-
-    region_mfn = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), 
PAGE_SHIFT));
-    ctx->p2m_batch = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), 
PAGE_SHIFT));
-    if (!ctx->hvm && ctx->superpages)
-    {
-        ctx->p2m_saved_batch =
-            malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT));
-        if ( ctx->p2m_saved_batch == NULL )
-        {
-            ERROR("saved batch memory alloc failed");
-            errno = ENOMEM;
-            goto out;
-        }
-    }
-
-    if ( (ctx->p2m == NULL) || (pfn_type == NULL) ||
-         (region_mfn == NULL) || (ctx->p2m_batch == NULL) )
-    {
-        ERROR("memory alloc failed");
-        errno = ENOMEM;
-        goto out;
-    }
-
-    memset(region_mfn, 0,
-           ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); 
-    memset(ctx->p2m_batch, 0,
-           ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); 
-
-    /* Get the domain's shared-info frame. */
-    if ( xc_domain_getinfo(xch, (domid_t)dom, 1, &info) != 1 )
-    {
-        PERROR("Could not get information on new domain");
-        goto out;
-    }
-    shared_info_frame = info.shared_info_frame;
-
-    /* Mark all PFNs as invalid; we allocate on demand */
-    for ( pfn = 0; pfn < dinfo->p2m_size; pfn++ )
-        ctx->p2m[pfn] = INVALID_P2M_ENTRY;
-
-    mmu = xc_alloc_mmu_updates(xch, dom);
-    if ( mmu == NULL )
-    {
-        PERROR("Could not initialise for MMU updates");
-        goto out;
-    }
-
-    xc_set_progress_prefix(xch, "Reloading memory pages");
-    xc_report_progress_step(xch, 0, dinfo->p2m_size);
-
-    /*
-     * Now simply read each saved frame into its new machine frame.
-     * We uncanonicalise page tables as we go.
-     */
-
-    n = m = 0;
- loadpages:
-    for ( ; ; )
-    {
-        int j, curbatch, invalid_pages;
-
-        xc_report_progress_step(xch, n, dinfo->p2m_size);
-
-        if ( !ctx->completed ) {
-            pagebuf.nr_physpages = pagebuf.nr_pages = 0;
-            pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
-            if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) {
-                PERROR("Error when reading batch");
-                goto out;
-            }
-        }
-        j = pagebuf.nr_pages;
-
-        DBGPRINTF("batch %d\n",j);
-
-        if ( j == 0 ) {
-            /* catch vcpu updates */
-            if (pagebuf.new_ctxt_format) {
-                max_vcpu_id = pagebuf.max_vcpu_id;
-                memcpy(vcpumap, pagebuf.vcpumap, vcpumap_sz(max_vcpu_id));
-            }
-            /* should this be deferred? does it change? */
-            if ( pagebuf.identpt )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_IDENT_PT, 
pagebuf.identpt);
-            if ( pagebuf.paging_ring_pfn )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_PAGING_RING_PFN, 
pagebuf.paging_ring_pfn);
-            if ( pagebuf.monitor_ring_pfn )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_MONITOR_RING_PFN, 
pagebuf.monitor_ring_pfn);
-            if ( pagebuf.sharing_ring_pfn )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_SHARING_RING_PFN, 
pagebuf.sharing_ring_pfn);
-            if ( pagebuf.vm86_tss )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_VM86_TSS, 
pagebuf.vm86_tss);
-            if ( pagebuf.console_pfn )
-                console_pfn = pagebuf.console_pfn;
-            if ( pagebuf.vm_generationid_addr )
-                xc_hvm_param_set(xch, dom, HVM_PARAM_VM_GENERATION_ID_ADDR,
-                                 pagebuf.vm_generationid_addr);
-
-            break;  /* our work here is done */
-        }
-
-        /* break pagebuf into batches */
-        curbatch = 0;
-        invalid_pages = 0;
-        while ( curbatch < j ) {
-            int brc;
-
-            brc = apply_batch(xch, dom, ctx, region_mfn, pfn_type,
-                              pae_extended_cr3, mmu, &pagebuf, curbatch,
-                              &invalid_pages);
-            if ( brc < 0 )
-                goto out;
-
-            nraces += brc;
-
-            curbatch += MAX_BATCH_SIZE;
-        }
-
-        pagebuf.nr_physpages = pagebuf.nr_pages = 0;
-        pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
-
-        n += j; /* crude stats */
-
-        /* 
-         * Discard cache for portion of file read so far up to last
-         *  page boundary every 16MB or so.
-         */
-        m += j;
-        if ( m > MAX_PAGECACHE_USAGE )
-        {
-            discard_file_cache(xch, io_fd, 0 /* no flush */);
-            m = 0;
-        }
-    }
-
-    /*
-     * Ensure we flush all machphys updates before potential PAE-specific
-     * reallocations below.
-     */
-    if ( !hvm && xc_flush_mmu_updates(xch, mmu) )
-    {
-        PERROR("Error doing flush_mmu_updates()");
-        goto out;
-    }
-
-    // DPRINTF("Received all pages (%d races)\n", nraces);
-
-    if ( !ctx->completed ) {
-
-        if ( buffer_tail(xch, ctx, &tailbuf, io_fd, max_vcpu_id, vcpumap,
-                         ext_vcpucontext, vcpuextstate_size) < 0 ) {
-            ERROR ("error buffering image tail");
-            goto out;
-        }
-
-        ctx->completed = 1;
-
-        /*
-         * If more checkpoints are expected then shift into
-         * nonblocking mode for the remainder.
-         */
-        if ( !ctx->last_checkpoint )
-            fcntl(io_fd, F_SETFL, orig_io_fd_flags | O_NONBLOCK);
-
-        /*
-         * If sender had sent enable compression flag, switch to compressed
-         * checkpoints mode once the first checkpoint is received.
-         */
-        if (ctx->compressing)
-            pagebuf.compressing = 1;
-    }
-
-    if (pagebuf.viridian != 0)
-        xc_hvm_param_set(xch, dom, HVM_PARAM_VIRIDIAN, pagebuf.viridian);
-
-    /*
-     * If we are migrating in from a host that does not support
-     * secondary emulators then nr_ioreq_server_pages will be 0, since
-     * there will be no XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES chunk in
-     * the image.
-     * If we are migrating from a host that does support secondary
-     * emulators then the XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES chunk
-     * will exist and is guaranteed to have a non-zero value. The
-     * existence of that chunk also implies the existence of the
-     * XC_SAVE_ID_HVM_IOREQ_SERVER_PFN chunk, which is also guaranteed
-     * to have a non-zero value.
-     */
-    if (!pagebuf.nr_ioreq_server_pages ^ !pagebuf.ioreq_server_pfn) {
-        ERROR("Inconsistent IOREQ Server settings (nr=%"PRIx64", 
pfn=%"PRIx64")",
-              pagebuf.nr_ioreq_server_pages, pagebuf.ioreq_server_pfn);
-    } else {
-        if (pagebuf.nr_ioreq_server_pages != 0 &&
-            pagebuf.ioreq_server_pfn != 0) {
-            xc_hvm_param_set(xch, dom, HVM_PARAM_NR_IOREQ_SERVER_PAGES,
-                             pagebuf.nr_ioreq_server_pages);
-            xc_hvm_param_set(xch, dom, HVM_PARAM_IOREQ_SERVER_PFN,
-                             pagebuf.ioreq_server_pfn);
-        }
-    }
-
-    if (pagebuf.acpi_ioport_location == 1) {
-        DBGPRINTF("Use new firmware ioport from the checkpoint\n");
-        xc_hvm_param_set(xch, dom, HVM_PARAM_ACPI_IOPORTS_LOCATION, 1);
-    } else if (pagebuf.acpi_ioport_location == 0) {
-        DBGPRINTF("Use old firmware ioport from the checkpoint\n");
-    } else {
-        ERROR("Error, unknow acpi ioport location (%"PRId64")", 
pagebuf.acpi_ioport_location);
-    }
-
-    tdatatmp = tdata;
-    tdata = pagebuf.tdata;
-    pagebuf.tdata = tdatatmp;
-
-    if ( ctx->last_checkpoint )
-    {
-        // DPRINTF("Last checkpoint, finishing\n");
-        goto finish;
-    }
-
-    // DPRINTF("Buffered checkpoint\n");
-
-    if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
-        PERROR("error when buffering batch, finishing");
-        /*
-         * Remus: discard the current incomplete checkpoint and restore
-         * backup from the last complete checkpoint.
-         */
-        goto finish;
-    }
-    memset(&tmptail, 0, sizeof(tmptail));
-    tmptail.ishvm = hvm;
-    if ( buffer_tail(xch, ctx, &tmptail, io_fd, max_vcpu_id, vcpumap,
-                     ext_vcpucontext, vcpuextstate_size) < 0 ) {
-        ERROR ("error buffering image tail, finishing");
-        /*
-         * Remus: discard the current incomplete checkpoint and restore
-         * backup from the last complete checkpoint.
-         */
-        goto finish;
-    }
-    tailbuf_free(&tailbuf);
-    memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
-
-    goto loadpages;
-
-  /* With Remus: restore from last complete checkpoint */
-  finish:
-    if ( hvm )
-        goto finish_hvm;
-
-    if ( (ctx->pt_levels == 3) && !pae_extended_cr3 )
-    {
-        /*
-        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
-        ** is a little awkward and involves (a) finding all such PGDs and
-        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
-        ** with the new info; and (c) canonicalizing all the L1s using the
-        ** (potentially updated) p2m[].
-        **
-        ** This is relatively slow (and currently involves two passes through
-        ** the pfn_type[] array), but at least seems to be correct. May wish
-        ** to consider more complex approaches to optimize this later.
-        */
-
-        int j, k;
-        
-        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for ( i = 0; i < dinfo->p2m_size; i++ )
-        {
-            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
-                  XEN_DOMCTL_PFINFO_L3TAB) &&
-                 (ctx->p2m[i] > 0xfffffUL) )
-            {
-                unsigned long new_mfn;
-                uint64_t l3ptes[4];
-                uint64_t *l3tab;
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xch, dom, PAGE_SIZE,
-                                         PROT_READ, ctx->p2m[i]);
-                if ( l3tab == NULL )
-                {
-                    PERROR("xc_map_foreign_range failed (for l3tab)");
-                    goto out;
-                }
-
-                for ( j = 0; j < 4; j++ )
-                    l3ptes[j] = l3tab[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-                new_mfn = xc_make_page_below_4G(xch, dom, ctx->p2m[i]);
-                if ( !new_mfn )
-                {
-                    PERROR("Couldn't get a page below 4GB :-(");
-                    goto out;
-                }
-
-                ctx->p2m[i] = new_mfn;
-                if ( xc_add_mmu_update(xch, mmu,
-                                       (((unsigned long long)new_mfn)
-                                        << PAGE_SHIFT) |
-                                       MMU_MACHPHYS_UPDATE, i) )
-                {
-                    PERROR("Couldn't m2p on PAE root pgdir");
-                    goto out;
-                }
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xch, dom, PAGE_SIZE,
-                                         PROT_READ | PROT_WRITE, ctx->p2m[i]);
-                if ( l3tab == NULL )
-                {
-                    PERROR("xc_map_foreign_range failed (for l3tab, 2nd)");
-                    goto out;
-                }
-
-                for ( j = 0; j < 4; j++ )
-                    l3tab[j] = l3ptes[j];
-
-                munmap(l3tab, PAGE_SIZE);
-            }
-        }
-
-        /* Second pass: find all L1TABs and uncanonicalize them */
-        j = 0;
-
-        for ( i = 0; i < dinfo->p2m_size; i++ )
-        {
-            if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
-                  XEN_DOMCTL_PFINFO_L1TAB) )
-            {
-                region_mfn[j] = ctx->p2m[i];
-                j++;
-            }
-
-            if ( (i == (dinfo->p2m_size-1)) || (j == MAX_BATCH_SIZE) )
-            {
-                region_base = xc_map_foreign_pages(
-                    xch, dom, PROT_READ | PROT_WRITE, region_mfn, j);
-                if ( region_base == NULL )
-                {
-                    PERROR("map batch failed");
-                    goto out;
-                }
-
-                for ( k = 0; k < j; k++ )
-                {
-                    if ( !uncanonicalize_pagetable(
-                        xch, dom, ctx,
-                        region_base + k*PAGE_SIZE) )
-                    {
-                        ERROR("failed uncanonicalize pt!");
-                        goto out;
-                    }
-                }
-
-                munmap(region_base, j*PAGE_SIZE);
-                j = 0;
-            }
-        }
-
-        if ( xc_flush_mmu_updates(xch, mmu) )
-        {
-            PERROR("Error doing xc_flush_mmu_updates()");
-            goto out;
-        }
-    }
-
-    /*
-     * Pin page tables. Do this after writing to them as otherwise Xen
-     * will barf when doing the type-checking.
-     */
-    nr_pins = 0;
-    for ( i = 0; i < dinfo->p2m_size; i++ )
-    {
-        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
-            continue;
-
-        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
-        {
-        case XEN_DOMCTL_PFINFO_L1TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
-            break;
-
-        case XEN_DOMCTL_PFINFO_L2TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
-            break;
-
-        case XEN_DOMCTL_PFINFO_L3TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
-            break;
-
-        case XEN_DOMCTL_PFINFO_L4TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
-            break;
-
-        default:
-            continue;
-        }
-
-        pin[nr_pins].arg1.mfn = ctx->p2m[i];
-        nr_pins++;
-
-        /* Batch full? Then flush. */
-        if ( nr_pins == MAX_PIN_BATCH )
-        {
-            if ( xc_mmuext_op(xch, pin, nr_pins, dom) < 0 )
-            {
-                PERROR("Failed to pin batch of %d page tables", nr_pins);
-                goto out;
-            }
-            nr_pins = 0;
-        }
-    }
-
-    /* Flush final partial batch. */
-    if ( (nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0) )
-    {
-        PERROR("Failed to pin batch of %d page tables", nr_pins);
-        goto out;
-    }
-
-    DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
-
-    /* Get the list of PFNs that are not in the psuedo-phys map */
-    {
-        int nr_frees = 0;
-
-        for ( i = 0; i < tailbuf.u.pv.pfncount; i++ )
-        {
-            unsigned long pfn = tailbuf.u.pv.pfntab[i];
-
-            if ( ctx->p2m[pfn] != INVALID_P2M_ENTRY )
-            {
-                /* pfn is not in physmap now, but was at some point during
-                   the save/migration process - need to free it */
-                tailbuf.u.pv.pfntab[nr_frees++] = ctx->p2m[pfn];
-                ctx->p2m[pfn]  = INVALID_P2M_ENTRY; /* not in pseudo-physical 
map */
-            }
-        }
-
-        if ( nr_frees > 0 )
-        {
-            if ( (frc = xc_domain_decrease_reservation(xch, dom, nr_frees, 0, 
tailbuf.u.pv.pfntab)) != nr_frees )
-            {
-                PERROR("Could not decrease reservation : %d", frc);
-                goto out;
-            }
-            else
-                DPRINTF("Decreased reservation by %d pages\n", 
tailbuf.u.pv.pfncount);
-        }
-    }
-
-    vcpup = tailbuf.u.pv.vcpubuf;
-    for ( i = 0; i <= max_vcpu_id; i++ )
-    {
-        if ( !(vcpumap[i/64] & (1ULL << (i%64))) )
-            continue;
-
-        memcpy(ctxt, vcpup, ((dinfo->guest_width == 8) ? sizeof(ctxt->x64)
-                              : sizeof(ctxt->x32)));
-        vcpup += (dinfo->guest_width == 8) ? sizeof(ctxt->x64) : 
sizeof(ctxt->x32);
-
-        DPRINTF("read VCPU %d\n", i);
-
-        if ( !new_ctxt_format )
-            SET_FIELD(ctxt, flags,
-                      GET_FIELD(ctxt, flags, dinfo->guest_width) | VGCF_online,
-                      dinfo->guest_width);
-
-        if ( i == 0 )
-        {
-            /*
-             * Uncanonicalise the start info frame number and poke in
-             * updated values into the start info itself.
-             *
-             * The start info MFN is the 3rd argument to the
-             * HYPERVISOR_sched_op hypercall when op==SCHEDOP_shutdown
-             * and reason==SHUTDOWN_suspend, it is canonicalised in
-             * xc_domain_save and therefore the PFN is found in the
-             * edx register.
-             */
-            pfn = GET_FIELD(ctxt, user_regs.edx, dinfo->guest_width);
-            if ( (pfn >= dinfo->p2m_size) ||
-                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
-            {
-                ERROR("Suspend record frame number is bad");
-                goto out;
-            }
-            mfn = ctx->p2m[pfn];
-            SET_FIELD(ctxt, user_regs.edx, mfn, dinfo->guest_width);
-            start_info = xc_map_foreign_range(
-                xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
-            if ( start_info == NULL )
-            {
-                PERROR("xc_map_foreign_range failed (for start_info)");
-                goto out;
-            }
-
-            SET_FIELD(start_info, nr_pages, dinfo->p2m_size, 
dinfo->guest_width);
-            SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT, 
dinfo->guest_width);
-            SET_FIELD(start_info, flags, 0, dinfo->guest_width);
-            if ( GET_FIELD(start_info, store_mfn, dinfo->guest_width) > 
dinfo->p2m_size )
-            {
-                ERROR("Suspend record xenstore frame number is bad");
-                munmap(start_info, PAGE_SIZE);
-                goto out;
-            }
-            *store_mfn = ctx->p2m[GET_FIELD(start_info, store_mfn, 
dinfo->guest_width)];
-            SET_FIELD(start_info, store_mfn, *store_mfn, dinfo->guest_width);
-            SET_FIELD(start_info, store_evtchn, store_evtchn, 
dinfo->guest_width);
-            if ( GET_FIELD(start_info, console.domU.mfn, dinfo->guest_width) > 
dinfo->p2m_size )
-            {
-                ERROR("Suspend record console frame number is bad");
-                munmap(start_info, PAGE_SIZE);
-                goto out;
-            }
-            *console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn, 
dinfo->guest_width)];
-            SET_FIELD(start_info, console.domU.mfn, *console_mfn, 
dinfo->guest_width);
-            SET_FIELD(start_info, console.domU.evtchn, console_evtchn, 
dinfo->guest_width);
-            munmap(start_info, PAGE_SIZE);
-        }
-        /* Uncanonicalise each GDT frame number. */
-        if ( GET_FIELD(ctxt, gdt_ents, dinfo->guest_width) > 8192 )
-        {
-            ERROR("GDT entry count out of range");
-            goto out;
-        }
-
-        for ( j = 0; (512*j) < GET_FIELD(ctxt, gdt_ents, dinfo->guest_width); 
j++ )
-        {
-            pfn = GET_FIELD(ctxt, gdt_frames[j], dinfo->guest_width);
-            if ( (pfn >= dinfo->p2m_size) ||
-                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
-            {
-                ERROR("GDT frame number %i (0x%lx) is bad", 
-                      j, (unsigned long)pfn);
-                goto out;
-            }
-            SET_FIELD(ctxt, gdt_frames[j], ctx->p2m[pfn], dinfo->guest_width);
-        }
-        /* Uncanonicalise the page table base pointer. */
-        pfn = UNFOLD_CR3(GET_FIELD(ctxt, ctrlreg[3], dinfo->guest_width));
-
-        if ( pfn >= dinfo->p2m_size )
-        {
-            ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
-                  pfn, dinfo->p2m_size, pfn_type[pfn]);
-            goto out;
-        }
-
-        if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
-             ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
-        {
-            ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                  pfn, dinfo->p2m_size, pfn_type[pfn],
-                  (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
-            goto out;
-        }
-        SET_FIELD(ctxt, ctrlreg[3], FOLD_CR3(ctx->p2m[pfn]), 
dinfo->guest_width);
-
-        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
-        if ( (ctx->pt_levels == 4) && (ctxt->x64.ctrlreg[1] & 1) )
-        {
-            pfn = UNFOLD_CR3(ctxt->x64.ctrlreg[1] & ~1);
-            if ( pfn >= dinfo->p2m_size )
-            {
-                ERROR("User PT base is bad: pfn=%lu p2m_size=%lu",
-                      pfn, dinfo->p2m_size);
-                goto out;
-            }
-            if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
-                 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) 
)
-            {
-                ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                      pfn, dinfo->p2m_size, pfn_type[pfn],
-                      (unsigned 
long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
-                goto out;
-            }
-            ctxt->x64.ctrlreg[1] = FOLD_CR3(ctx->p2m[pfn]);
-        }
-        frc = xc_vcpu_setcontext(xch, dom, i, ctxt);
-        if ( frc != 0 )
-        {
-            PERROR("Couldn't build vcpu%d", i);
-            goto out;
-        }
-
-        if ( !ext_vcpucontext )
-            goto vcpu_ext_state_restore;
-        memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
-        vcpup += 128;
-        domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
-        domctl.domain = dom;
-        frc = xc_domctl(xch, &domctl);
-        if ( frc != 0 )
-        {
-            PERROR("Couldn't set extended vcpu%d info", i);
-            goto out;
-        }
-
- vcpu_ext_state_restore:
-        if ( !vcpuextstate_size )
-            continue;
-
-        memcpy(&domctl.u.vcpuextstate.xfeature_mask, vcpup,
-               sizeof(domctl.u.vcpuextstate.xfeature_mask));
-        vcpup += sizeof(domctl.u.vcpuextstate.xfeature_mask);
-        memcpy(&domctl.u.vcpuextstate.size, vcpup,
-               sizeof(domctl.u.vcpuextstate.size));
-        vcpup += sizeof(domctl.u.vcpuextstate.size);
-
-        buffer = xc_hypercall_buffer_alloc(xch, buffer,
-                                           domctl.u.vcpuextstate.size);
-        if ( !buffer )
-        {
-            PERROR("Could not allocate buffer to restore eXtended States");
-            goto out;
-        }
-        memcpy(buffer, vcpup, domctl.u.vcpuextstate.size);
-        vcpup += domctl.u.vcpuextstate.size;
-
-        domctl.cmd = XEN_DOMCTL_setvcpuextstate;
-        domctl.domain = dom;
-        domctl.u.vcpuextstate.vcpu = i;
-        set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
-        frc = xc_domctl(xch, &domctl);
-        if ( frc != 0 )
-        {
-            PERROR("Couldn't set eXtended States for vcpu%d", i);
-            goto out;
-        }
-        xc_hypercall_buffer_free(xch, buffer);
-    }
-
-    memcpy(shared_info_page, tailbuf.u.pv.shared_info_page, PAGE_SIZE);
-
-    DPRINTF("Completed checkpoint load\n");
-
-    /* Restore contents of shared-info page. No checking needed. */
-    new_shared_info = xc_map_foreign_range(
-        xch, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
-    if ( new_shared_info == NULL )
-    {
-        PERROR("xc_map_foreign_range failed (for new_shared_info)");
-        goto out;
-    }
-
-    /* restore saved vcpu_info and arch specific info */
-    MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info, 
dinfo->guest_width);
-    MEMCPY_FIELD(new_shared_info, old_shared_info, arch, dinfo->guest_width);
-
-    /* clear any pending events and the selector */
-    MEMSET_ARRAY_FIELD(new_shared_info, evtchn_pending, 0, dinfo->guest_width);
-    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
-           SET_FIELD(new_shared_info, vcpu_info[i].evtchn_pending_sel, 0, 
dinfo->guest_width);
-
-    /* mask event channels */
-    MEMSET_ARRAY_FIELD(new_shared_info, evtchn_mask, 0xff, dinfo->guest_width);
-
-    /* leave wallclock time. set by hypervisor */
-    munmap(new_shared_info, PAGE_SIZE);
-
-    /* Uncanonicalise the pfn-to-mfn table frame-number list. */
-    for ( i = 0; i < P2M_FL_ENTRIES; i++ )
-    {
-        pfn = p2m_frame_list[i];
-        if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != 
XEN_DOMCTL_PFINFO_NOTAB) )
-        {
-            ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
-            goto out;
-        }
-        p2m_frame_list[i] = ctx->p2m[pfn];
-    }
-
-    /* Copy the P2M we've constructed to the 'live' P2M */
-    if ( !(ctx->live_p2m = xc_map_foreign_pages(xch, dom, PROT_WRITE,
-                                           p2m_frame_list, P2M_FL_ENTRIES)) )
-    {
-        PERROR("Couldn't map p2m table");
-        goto out;
-    }
-
-    /* If the domain we're restoring has a different word size to ours,
-     * we need to adjust the live_p2m assignment appropriately */
-    if ( dinfo->guest_width > sizeof (xen_pfn_t) )
-        for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
-            ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
-    else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
-        for ( i = 0; i < dinfo->p2m_size; i++ )   
-            ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
-    else
-        memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
-    munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
-
-    frc = xc_dom_gnttab_seed(xch, dom, *console_mfn, *store_mfn,
-                             console_domid, store_domid);
-    if (frc != 0)
-    {
-        ERROR("error seeding grant table");
-        goto out;
-    }
-
-    DPRINTF("Domain ready to be built.\n");
-    rc = 0;
-    goto out;
-
-  finish_hvm:
-    if ( tdata.data != NULL )
-    {
-        if ( callbacks != NULL && callbacks->toolstack_restore != NULL )
-        {
-            frc = callbacks->toolstack_restore(dom, tdata.data, tdata.len,
-                                               callbacks->data);
-            free(tdata.data);
-            if ( frc < 0 )
-            {
-                PERROR("error calling toolstack_restore");
-                goto out;
-            }
-        } else {
-            rc = -1;
-            ERROR("toolstack data available but no callback provided\n");
-            free(tdata.data);
-            goto out;
-        }
-    }
-
-    /* Dump the QEMU state to a state file for QEMU to load */
-    if ( dump_qemu(xch, dom, &tailbuf.u.hvm) ) {
-        PERROR("Error dumping QEMU state to file");
-        goto out;
-    }
-
-    /* These comms pages need to be zeroed at the start of day */
-    if ( xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[0]) ||
-         xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[1]) ||
-         xc_clear_domain_page(xch, dom, tailbuf.u.hvm.magicpfns[2]) )
-    {
-        PERROR("error zeroing magic pages");
-        goto out;
-    }
-
-    if ( (frc = xc_hvm_param_set(xch, dom,
-                                 HVM_PARAM_IOREQ_PFN, 
tailbuf.u.hvm.magicpfns[0]))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_BUFIOREQ_PFN, 
tailbuf.u.hvm.magicpfns[1]))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_STORE_PFN, 
tailbuf.u.hvm.magicpfns[2]))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_PAE_ENABLED, pae))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_STORE_EVTCHN,
-                                    store_evtchn))
-         || (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_CONSOLE_EVTCHN,
-                                    console_evtchn)) )
-    {
-        PERROR("error setting HVM params: %i", frc);
-        goto out;
-    }
-    *store_mfn = tailbuf.u.hvm.magicpfns[2];
-
-    if ( console_pfn ) {
-        if ( xc_clear_domain_page(xch, dom, console_pfn) ) {
-            PERROR("error zeroing console page");
-            goto out;
-        }
-        if ( (frc = xc_hvm_param_set(xch, dom,
-                                    HVM_PARAM_CONSOLE_PFN, console_pfn)) ) {
-            PERROR("error setting HVM param: %i", frc);
-            goto out;
-        }
-        *console_mfn = console_pfn;
-    }
-
-    frc = xc_domain_hvm_setcontext(xch, dom, tailbuf.u.hvm.hvmbuf,
-                                   tailbuf.u.hvm.reclen);
-    if ( frc )
-    {
-        PERROR("error setting the HVM context");
-        goto out;
-    }
-
-    frc = xc_dom_gnttab_hvm_seed(xch, dom, *console_mfn, *store_mfn,
-                                 console_domid, store_domid);
-    if (frc != 0)
-    {
-        ERROR("error seeding grant table");
-        goto out;
-    }
-
-    /* HVM success! */
-    rc = 0;
-
- out:
-    if ( (rc != 0) && (dom != 0) )
-        xc_domain_destroy(xch, dom);
-    xc_hypercall_buffer_free(xch, ctxt);
-    free(mmu);
-    free(ctx->p2m);
-    free(pfn_type);
-    free(region_mfn);
-    free(ctx->p2m_batch);
-    pagebuf_free(&pagebuf);
-    tailbuf_free(&tailbuf);
-
-    /* discard cache for save file  */
-    discard_file_cache(xch, io_fd, 1 /*flush*/);
-
-    fcntl(io_fd, F_SETFL, orig_io_fd_flags);
-
-    DPRINTF("Restore exit of domid %u with rc=%d\n", dom, rc);
-
-    return rc;
-}
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
deleted file mode 100644
index 3222473..0000000
--- a/tools/libxc/xc_domain_save.c
+++ /dev/null
@@ -1,2198 +0,0 @@
-/******************************************************************************
- * xc_linux_save.c
- *
- * Save the state of a running Linux session.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
- *
- * Copyright (c) 2003, K A Fraser.
- */
-
-#include <inttypes.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <assert.h>
-
-#include "xc_private.h"
-#include "xc_bitops.h"
-#include "xc_dom.h"
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-#include <xen/hvm/params.h>
-
-/*
-** Default values for important tuning parameters. Can override by passing
-** non-zero replacement values to xc_domain_save().
-**
-** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
-**
-*/
-#define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
-#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
-
-struct save_ctx {
-    unsigned long hvirt_start; /* virtual starting address of the hypervisor */
-    unsigned int pt_levels; /* #levels of page tables used by the current 
guest */
-    unsigned long max_mfn; /* max mfn of the whole machine */
-    xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its 
current MFN. */
-    xen_pfn_t *live_m2p; /* Live mapping of system MFN to PFN table. */
-    unsigned long m2p_mfn0;
-    struct domain_info_context dinfo;
-};
-
-/* buffer for output */
-struct outbuf {
-    void* buf;
-    size_t size;
-    size_t pos;
-    int write_count;
-};
-
-#define OUTBUF_SIZE (16384 * 1024)
-
-/* grep fodder: machine_to_phys */
-
-#define mfn_to_pfn(_mfn)  (ctx->live_m2p[(_mfn)])
-
-#define pfn_to_mfn(_pfn)                                            \
-  ((xen_pfn_t) ((dinfo->guest_width==8)                               \
-                ? (((uint64_t *)ctx->live_p2m)[(_pfn)])                  \
-                : ((((uint32_t *)ctx->live_p2m)[(_pfn)]) == 0xffffffffU  \
-                   ? (-1UL) : (((uint32_t *)ctx->live_p2m)[(_pfn)]))))
-
-/*
- * Returns TRUE if the given machine frame number has a unique mapping
- * in the guest's pseudophysical map.
- */
-#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
-    (((_mfn) < (ctx->max_mfn)) &&                \
-     ((mfn_to_pfn(_mfn) < (dinfo->p2m_size)) &&   \
-      (pfn_to_mfn(mfn_to_pfn(_mfn)) == (_mfn))))
-
-#define SUPERPAGE_PFN_SHIFT  9
-#define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
-
-#define SUPER_PAGE_START(pfn)    (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
-
-static uint64_t tv_to_us(struct timeval *new)
-{
-    return (new->tv_sec * 1000000) + new->tv_usec;
-}
-
-static uint64_t llgettimeofday(void)
-{
-    struct timeval now;
-    gettimeofday(&now, NULL);
-    return tv_to_us(&now);
-}
-
-static uint64_t tv_delta(struct timeval *new, struct timeval *old)
-{
-    return (((new->tv_sec - old->tv_sec)*1000000) +
-            (new->tv_usec - old->tv_usec));
-}
-
-static int noncached_write(xc_interface *xch,
-                           struct outbuf* ob,
-                           int fd, void *buffer, int len) 
-{
-    int rc = (write_exact(fd, buffer, len) == 0) ? len : -1;
-
-    ob->write_count += len;
-    if ( ob->write_count >= (MAX_PAGECACHE_USAGE * PAGE_SIZE) )
-    {
-        /* Time to discard cache - dont care if this fails */
-        int saved_errno = errno;
-        discard_file_cache(xch, fd, 0 /* no flush */);
-        errno = saved_errno;
-        ob->write_count = 0;
-    }
-
-    return rc;
-}
-
-static int outbuf_init(xc_interface *xch, struct outbuf* ob, size_t size)
-{
-    memset(ob, 0, sizeof(*ob));
-
-    if ( !(ob->buf = malloc(size)) ) {
-        DPRINTF("error allocating output buffer of size %zu\n", size);
-        return -1;
-    }
-
-    ob->size = size;
-
-    return 0;
-}
-
-static int outbuf_free(struct outbuf *ob)
-{
-    free(ob->buf);
-    ob->buf = NULL;
-    return 0;
-}
-
-static inline int outbuf_write(xc_interface *xch,
-                               struct outbuf* ob, void* buf, size_t len)
-{
-    if ( len > ob->size - ob->pos ) {
-        errno = ERANGE;
-        DBGPRINTF("outbuf_write: %zu > %zu@%zu\n", len, ob->size - ob->pos, 
ob->pos);
-        return -1;
-    }
-
-    memcpy(ob->buf + ob->pos, buf, len);
-    ob->pos += len;
-
-    return 0;
-}
-
-/* prep for nonblocking I/O */
-static int outbuf_flush(xc_interface *xch, struct outbuf* ob, int fd)
-{
-    int rc;
-    int cur = 0;
-
-    if ( !ob->pos )
-        return 0;
-
-    rc = write(fd, ob->buf, ob->pos);
-    while (rc < 0 || cur + rc < ob->pos) {
-        if (rc < 0 && errno != EAGAIN && errno != EINTR) {
-            DPRINTF("error flushing output: %d\n", errno);
-            return -1;
-        }
-        if (rc > 0)
-            cur += rc;
-
-        rc = write(fd, ob->buf + cur, ob->pos - cur);
-    }
-
-    ob->pos = 0;
-
-    return 0;
-}
-
-/* if there's no room in the buffer, flush it and try again. */
-static inline int outbuf_hardwrite(xc_interface *xch,
-                                   struct outbuf* ob, int fd, void* buf,
-                                   size_t len)
-{
-    if ( !len )
-        return 0;
-
-    if ( !outbuf_write(xch, ob, buf, len) )
-        return 0;
-
-    if ( outbuf_flush(xch, ob, fd) < 0 )
-        return -1;
-
-    return outbuf_write(xch, ob, buf, len);
-}
-
-/* start buffering output once we've reached checkpoint mode. */
-static inline int write_buffer(xc_interface *xch,
-                               int dobuf, struct outbuf* ob, int fd, void* buf,
-                               size_t len)
-{
-    if ( dobuf )
-        return outbuf_hardwrite(xch, ob, fd, buf, len);
-    else
-        return write_exact(fd, buf, len);
-}
-
-/* like write_buffer for noncached, which returns number of bytes written */
-static inline int write_uncached(xc_interface *xch,
-                                   int dobuf, struct outbuf* ob, int fd,
-                                   void* buf, size_t len)
-{
-    if ( dobuf )
-        return outbuf_hardwrite(xch, ob, fd, buf, len) ? -1 : len;
-    else
-        return noncached_write(xch, ob, fd, buf, len);
-}
-
-static int write_compressed(xc_interface *xch, comp_ctx *compress_ctx,
-                            int dobuf, struct outbuf* ob, int fd)
-{
-    int rc = 0;
-    int header = sizeof(int) + sizeof(unsigned long);
-    int marker = XC_SAVE_ID_COMPRESSED_DATA;
-    unsigned long compbuf_len = 0;
-
-    for(;;)
-    {
-        /* check for available space (atleast 8k) */
-        if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size)
-        {
-            if (outbuf_flush(xch, ob, fd) < 0)
-            {
-                ERROR("Error when flushing outbuf intermediate");
-                return -1;
-            }
-        }
-
-        rc = xc_compression_compress_pages(xch, compress_ctx,
-                                           ob->buf + ob->pos + header,
-                                           ob->size - ob->pos - header,
-                                           &compbuf_len);
-        if (!rc)
-            break;
-
-        if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0)
-        {
-            PERROR("Error when writing marker (errno %d)", errno);
-            return -1;
-        }
-
-        if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) < 
0)
-        {
-            PERROR("Error when writing compbuf_len (errno %d)", errno);
-            return -1;
-        }
-
-        ob->pos += (size_t) compbuf_len;
-        if (!dobuf && outbuf_flush(xch, ob, fd) < 0)
-        {
-            ERROR("Error when writing compressed chunk");
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-struct time_stats {
-    struct timeval wall;
-    long long d0_cpu, d1_cpu;
-};
-
-static int print_stats(xc_interface *xch, uint32_t domid, int pages_sent,
-                       struct time_stats *last,
-                       xc_shadow_op_stats_t *stats, int print)
-{
-    struct time_stats now;
-
-    gettimeofday(&now.wall, NULL);
-
-    now.d0_cpu = xc_domain_get_cpu_usage(xch, 0, /* FIXME */ 0)/1000;
-    now.d1_cpu = xc_domain_get_cpu_usage(xch, domid, /* FIXME */ 0)/1000;
-
-    if ( (now.d0_cpu == -1) || (now.d1_cpu == -1) )
-        DPRINTF("ARRHHH!!\n");
-
-    if ( print )
-    {
-        long long wall_delta;
-        long long d0_cpu_delta;
-        long long d1_cpu_delta;
-
-        wall_delta = tv_delta(&now.wall,&last->wall)/1000;
-        if ( wall_delta == 0 )
-            wall_delta = 1;
-
-        d0_cpu_delta = (now.d0_cpu - last->d0_cpu)/1000;
-        d1_cpu_delta = (now.d1_cpu - last->d1_cpu)/1000;
-
-        DPRINTF("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
-                "dirtied %dMb/s %" PRId32 " pages\n",
-                wall_delta,
-                (int)((d0_cpu_delta*100)/wall_delta),
-                (int)((d1_cpu_delta*100)/wall_delta),
-                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
-                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
-                stats->dirty_count);
-    }
-
-    *last = now;
-
-    return 0;
-}
-
-
-static int analysis_phase(xc_interface *xch, uint32_t domid, struct save_ctx 
*ctx,
-                          xc_hypercall_buffer_t *arr, int runs)
-{
-    long long start, now;
-    xc_shadow_op_stats_t stats;
-    int j;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    start = llgettimeofday();
-
-    for ( j = 0; j < runs; j++ )
-    {
-        int i;
-
-        xc_shadow_control(xch, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-                          arr, dinfo->p2m_size, NULL, 0, NULL);
-        DPRINTF("#Flush\n");
-        for ( i = 0; i < 40; i++ )
-        {
-            usleep(50000);
-            now = llgettimeofday();
-            xc_shadow_control(xch, domid, XEN_DOMCTL_SHADOW_OP_PEEK,
-                              NULL, 0, NULL, 0, &stats);
-            DPRINTF("now= %lld faults= %"PRId32" dirty= %"PRId32"\n",
-                    ((now-start)+500)/1000,
-                    stats.fault_count, stats.dirty_count);
-        }
-    }
-
-    return -1;
-}
-
-static int suspend_and_state(int (*suspend)(void*), void* data,
-                             xc_interface *xch, int io_fd, int dom,
-                             xc_dominfo_t *info)
-{
-    if ( !(*suspend)(data) )
-    {
-        ERROR("Suspend request failed");
-        return -1;
-    }
-
-    if ( (xc_domain_getinfo(xch, dom, 1, info) != 1) ||
-         !info->shutdown || (info->shutdown_reason != SHUTDOWN_suspend) )
-    {
-        ERROR("Domain not in suspended state");
-        return -1;
-    }
-
-    return 0;
-}
-
-/*
-** Map the top-level page of MFNs from the guest. The guest might not have
-** finished resuming from a previous restore operation, so we wait a while for
-** it to update the MFN to a reasonable value.
-*/
-static void *map_frame_list_list(xc_interface *xch, uint32_t dom,
-                                 struct save_ctx *ctx,
-                                 shared_info_any_t *shinfo)
-{
-    int count = 100;
-    void *p;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-    uint64_t fll = GET_FIELD(shinfo, arch.pfn_to_mfn_frame_list_list, 
dinfo->guest_width);
-
-    while ( count-- && (fll == 0) )
-    {
-        usleep(10000);
-        fll = GET_FIELD(shinfo, arch.pfn_to_mfn_frame_list_list, 
dinfo->guest_width);
-    }
-
-    if ( fll == 0 )
-    {
-        ERROR("Timed out waiting for frame list updated.");
-        return NULL;
-    }
-
-    p = xc_map_foreign_range(xch, dom, PAGE_SIZE, PROT_READ, fll);
-    if ( p == NULL )
-        PERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
-
-    return p;
-}
-
-/*
-** During transfer (or in the state file), all page-table pages must be
-** converted into a 'canonical' form where references to actual mfns
-** are replaced with references to the corresponding pfns.
-**
-** This function performs the appropriate conversion, taking into account
-** which entries do not require canonicalization (in particular, those
-** entries which map the virtual address reserved for the hypervisor).
-*/
-static int canonicalize_pagetable(struct save_ctx *ctx,
-                           unsigned long type, unsigned long pfn,
-                           const void *spage, void *dpage)
-{
-    struct domain_info_context *dinfo = &ctx->dinfo;
-    int i, pte_last, xen_start, xen_end, race = 0; 
-    uint64_t pte;
-
-    /*
-    ** We need to determine which entries in this page table hold
-    ** reserved hypervisor mappings. This depends on the current
-    ** page table type as well as the number of paging levels.
-    */
-    xen_start = xen_end = pte_last = PAGE_SIZE / 8;
-
-    if ( (ctx->pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L3TAB) )
-        xen_start = L3_PAGETABLE_ENTRIES_PAE;
-
-    /*
-    ** In PAE only the L2 mapping the top 1GB contains Xen mappings.
-    ** We can spot this by looking for the guest's mappingof the m2p.
-    ** Guests must ensure that this check will fail for other L2s.
-    */
-    if ( (ctx->pt_levels == 3) && (type == XEN_DOMCTL_PFINFO_L2TAB) )
-    {
-        int hstart;
-        uint64_t he;
-
-        hstart = (ctx->hvirt_start >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
-        he = ((const uint64_t *) spage)[hstart];
-
-        if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == ctx->m2p_mfn0 )
-        {
-            /* hvirt starts with xen stuff... */
-            xen_start = hstart;
-        }
-        else if ( ctx->hvirt_start != 0xf5800000 )
-        {
-            /* old L2s from before hole was shrunk... */
-            hstart = (0xf5800000 >> L2_PAGETABLE_SHIFT_PAE) & 0x1ff;
-            he = ((const uint64_t *) spage)[hstart];
-            if ( ((he >> PAGE_SHIFT) & MFN_MASK_X86) == ctx->m2p_mfn0 )
-                xen_start = hstart;
-        }
-    }
-
-    if ( (ctx->pt_levels == 4) && (type == XEN_DOMCTL_PFINFO_L4TAB) )
-    {
-        /*
-        ** XXX SMH: should compute these from hvirt_start (which we have)
-        ** and hvirt_end (which we don't)
-        */
-        xen_start = 256;
-        xen_end   = 272;
-    }
-
-    /* Now iterate through the page table, canonicalizing each PTE */
-    for (i = 0; i < pte_last; i++ )
-    {
-        unsigned long pfn, mfn;
-
-        pte = ((const uint64_t*)spage)[i];
-
-        if ( (i >= xen_start) && (i < xen_end) )
-            pte = 0;
-
-        if ( pte & _PAGE_PRESENT )
-        {
-            mfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
-            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
-            {
-                /* This will happen if the type info is stale which
-                   is quite feasible under live migration */
-                pfn  = 0;  /* zap it - we'll retransmit this page later */
-                /* XXX: We can't spot Xen mappings in compat-mode L2es 
-                 * from 64-bit tools, but the only thing in them is the
-                 * compat m2p, so we quietly zap them.  This doesn't
-                 * count as a race, so don't report it. */
-                if ( !(type == XEN_DOMCTL_PFINFO_L2TAB 
-                       && sizeof (unsigned long) > dinfo->guest_width) )
-                     race = 1;  /* inform the caller; fatal if !live */ 
-            }
-            else
-                pfn = mfn_to_pfn(mfn);
-
-            pte &= ~MADDR_MASK_X86;
-            pte |= (uint64_t)pfn << PAGE_SHIFT;
-
-            /*
-             * PAE guest L3Es can contain these flags when running on
-             * a 64bit hypervisor. We zap these here to avoid any
-             * surprise at restore time...
-             */
-            if ( (ctx->pt_levels == 3) &&
-                 (type == XEN_DOMCTL_PFINFO_L3TAB) &&
-                 (pte & (_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED)) )
-                pte &= ~(_PAGE_USER|_PAGE_RW|_PAGE_ACCESSED);
-        }
-
-        ((uint64_t*)dpage)[i] = pte;
-    }
-
-    return race;
-}
-
-xen_pfn_t *xc_map_m2p(xc_interface *xch,
-                                 unsigned long max_mfn,
-                                 int prot,
-                                 unsigned long *mfn0)
-{
-    privcmd_mmap_entry_t *entries;
-    unsigned long m2p_chunks, m2p_size;
-    xen_pfn_t *m2p;
-    xen_pfn_t *extent_start;
-    int i;
-
-    m2p = NULL;
-    m2p_size   = M2P_SIZE(max_mfn);
-    m2p_chunks = M2P_CHUNKS(max_mfn);
-
-    extent_start = calloc(m2p_chunks, sizeof(xen_pfn_t));
-    if ( !extent_start )
-    {
-        ERROR("failed to allocate space for m2p mfns");
-        goto err0;
-    }
-
-    if ( xc_machphys_mfn_list(xch, m2p_chunks, extent_start) )
-    {
-        PERROR("xc_get_m2p_mfns");
-        goto err1;
-    }
-
-    entries = calloc(m2p_chunks, sizeof(privcmd_mmap_entry_t));
-    if (entries == NULL)
-    {
-        ERROR("failed to allocate space for mmap entries");
-        goto err1;
-    }
-
-    for ( i = 0; i < m2p_chunks; i++ )
-        entries[i].mfn = extent_start[i];
-
-    m2p = xc_map_foreign_ranges(xch, DOMID_XEN,
-                       m2p_size, prot, M2P_CHUNK_SIZE,
-                       entries, m2p_chunks);
-    if (m2p == NULL)
-    {
-        PERROR("xc_mmap_foreign_ranges failed");
-        goto err2;
-    }
-
-    if (mfn0)
-        *mfn0 = entries[0].mfn;
-
-err2:
-    free(entries);
-err1:
-    free(extent_start);
-
-err0:
-    return m2p;
-}
-
-
-static xen_pfn_t *map_and_save_p2m_table(xc_interface *xch, 
-                                         int io_fd, 
-                                         uint32_t dom,
-                                         struct save_ctx *ctx,
-                                         shared_info_any_t *live_shinfo)
-{
-    vcpu_guest_context_any_t ctxt;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    /* Double and single indirect references to the live P2M table */
-    void *live_p2m_frame_list_list = NULL;
-    void *live_p2m_frame_list = NULL;
-
-    /* Copies of the above. */
-    xen_pfn_t *p2m_frame_list_list = NULL;
-    xen_pfn_t *p2m_frame_list = NULL;
-
-    /* The mapping of the live p2m table itself */
-    xen_pfn_t *p2m = NULL;
-
-    int i, success = 0;
-
-    live_p2m_frame_list_list = map_frame_list_list(xch, dom, ctx,
-                                                   live_shinfo);
-    if ( !live_p2m_frame_list_list )
-        goto out;
-
-    /* Get a local copy of the live_P2M_frame_list_list */
-    if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) )
-    {
-        ERROR("Couldn't allocate p2m_frame_list_list array");
-        goto out;
-    }
-    memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE);
-
-    /* Canonicalize guest's unsigned long vs ours */
-    if ( dinfo->guest_width > sizeof(unsigned long) )
-        for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ )
-            if ( i < PAGE_SIZE/dinfo->guest_width )
-                p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i];
-            else
-                p2m_frame_list_list[i] = 0;
-    else if ( dinfo->guest_width < sizeof(unsigned long) )
-        for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- )
-            p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i];
-
-    live_p2m_frame_list =
-        xc_map_foreign_pages(xch, dom, PROT_READ,
-                             p2m_frame_list_list,
-                             P2M_FLL_ENTRIES);
-    if ( !live_p2m_frame_list )
-    {
-        PERROR("Couldn't map p2m_frame_list");
-        goto out;
-    }
-
-    /* Get a local copy of the live_P2M_frame_list */
-    if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) )
-    {
-        ERROR("Couldn't allocate p2m_frame_list array");
-        goto out;
-    }
-    memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE);
-    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE);
-
-    munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
-    live_p2m_frame_list = NULL;
-
-    /* Canonicalize guest's unsigned long vs ours */
-    if ( dinfo->guest_width > sizeof(unsigned long) )
-        for ( i = 0; i < P2M_FL_ENTRIES; i++ )
-            p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i];
-    else if ( dinfo->guest_width < sizeof(unsigned long) )
-        for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- )
-            p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i];
-
-
-    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
-       the guest must not change which frames are used for this purpose.
-       (its not clear why it would want to change them, and we'll be OK
-       from a safety POV anyhow. */
-
-    p2m = xc_map_foreign_pages(xch, dom, PROT_READ,
-                               p2m_frame_list,
-                               P2M_FL_ENTRIES);
-    if ( !p2m )
-    {
-        PERROR("Couldn't map p2m table");
-        goto out;
-    }
-    ctx->live_p2m = p2m; /* So that translation macros will work */
-    
-    /* Canonicalise the pfn-to-mfn table frame-number list. */
-    for ( i = 0; i < dinfo->p2m_size; i += FPP )
-    {
-        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(p2m_frame_list[i/FPP]) )
-        {
-            ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
-            ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64", max 0x%lx",
-                  i, i/FPP, (uint64_t)p2m_frame_list[i/FPP], ctx->max_mfn);
-            if ( p2m_frame_list[i/FPP] < ctx->max_mfn ) 
-            {
-                ERROR("m2p[0x%"PRIx64"] = 0x%"PRIx64, 
-                      (uint64_t)p2m_frame_list[i/FPP],
-                      (uint64_t)ctx->live_m2p[p2m_frame_list[i/FPP]]);
-                ERROR("p2m[0x%"PRIx64"] = 0x%"PRIx64, 
-                      (uint64_t)ctx->live_m2p[p2m_frame_list[i/FPP]],
-                      (uint64_t)p2m[ctx->live_m2p[p2m_frame_list[i/FPP]]]);
-
-            }
-            goto out;
-        }
-        p2m_frame_list[i/FPP] = mfn_to_pfn(p2m_frame_list[i/FPP]);
-    }
-
-    if ( xc_vcpu_getcontext(xch, dom, 0, &ctxt) )
-    {
-        PERROR("Could not get vcpu context");
-        goto out;
-    }
-
-    /*
-     * Write an extended-info structure to inform the restore code that
-     * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off
-     * slow paths in the restore code.
-     */
-    {
-        unsigned long signature = ~0UL;
-        uint32_t chunk1_sz = ((dinfo->guest_width==8) 
-                              ? sizeof(ctxt.x64) 
-                              : sizeof(ctxt.x32));
-        uint32_t chunk2_sz = 0;
-        uint32_t chunk3_sz = 4;
-        uint32_t xcnt_size = 0;
-        uint32_t tot_sz;
-        DECLARE_DOMCTL;
-
-        domctl.cmd = XEN_DOMCTL_getvcpuextstate;
-        domctl.domain = dom;
-        domctl.u.vcpuextstate.vcpu = 0;
-        domctl.u.vcpuextstate.size = 0;
-        domctl.u.vcpuextstate.xfeature_mask = 0;
-        if ( xc_domctl(xch, &domctl) < 0 )
-        {
-            PERROR("No extended context for VCPU%d", i);
-            goto out;
-        }
-        xcnt_size = domctl.u.vcpuextstate.size + 2 * sizeof(uint64_t);
-
-        tot_sz = (chunk1_sz + 8) + (chunk2_sz + 8);
-        if ( domctl.u.vcpuextstate.xfeature_mask )
-            tot_sz += chunk3_sz + 8;
-
-        if ( write_exact(io_fd, &signature, sizeof(signature)) ||
-             write_exact(io_fd, &tot_sz, sizeof(tot_sz)) ||
-             write_exact(io_fd, "vcpu", 4) ||
-             write_exact(io_fd, &chunk1_sz, sizeof(chunk1_sz)) ||
-             write_exact(io_fd, &ctxt, chunk1_sz) ||
-             write_exact(io_fd, "extv", 4) ||
-             write_exact(io_fd, &chunk2_sz, sizeof(chunk2_sz)) ||
-             (domctl.u.vcpuextstate.xfeature_mask) ?
-                (write_exact(io_fd, "xcnt", 4) ||
-                write_exact(io_fd, &chunk3_sz, sizeof(chunk3_sz)) ||
-                write_exact(io_fd, &xcnt_size, 4)) :
-                0 )
-        {
-            PERROR("write: extended info");
-            goto out;
-        }
-    }
-
-    if ( write_exact(io_fd, p2m_frame_list, 
-                     P2M_FL_ENTRIES * sizeof(xen_pfn_t)) )
-    {
-        PERROR("write: p2m_frame_list");
-        goto out;
-    }
-
-    success = 1;
-
- out:
-    
-    if ( !success && p2m )
-        munmap(p2m, P2M_FL_ENTRIES * PAGE_SIZE);
-
-    if ( live_p2m_frame_list_list )
-        munmap(live_p2m_frame_list_list, PAGE_SIZE);
-
-    if ( live_p2m_frame_list )
-        munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
-
-    free(p2m_frame_list_list);
-
-    free(p2m_frame_list);
-
-    return success ? p2m : NULL;
-}
-
-/* must be done AFTER suspend_and_state() */
-static int save_tsc_info(xc_interface *xch, uint32_t dom, int io_fd)
-{
-    int marker = XC_SAVE_ID_TSC_INFO;
-    uint32_t tsc_mode, khz, incarn;
-    uint64_t nsec;
-
-    if ( xc_domain_get_tsc_info(xch, dom, &tsc_mode,
-                                &nsec, &khz, &incarn) < 0  ||
-         write_exact(io_fd, &marker, sizeof(marker)) ||
-         write_exact(io_fd, &tsc_mode, sizeof(tsc_mode)) ||
-         write_exact(io_fd, &nsec, sizeof(nsec)) ||
-         write_exact(io_fd, &khz, sizeof(khz)) ||
-         write_exact(io_fd, &incarn, sizeof(incarn)) )
-        return -1;
-    return 0;
-}
-
-int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t 
max_iters,
-                   uint32_t max_factor, uint32_t flags,
-                   struct save_callbacks* callbacks, int hvm)
-{
-    xc_dominfo_t info;
-    DECLARE_DOMCTL;
-
-    int rc, frc, i, j, last_iter = 0, iter = 0;
-    int live  = (flags & XCFLAGS_LIVE);
-    int debug = (flags & XCFLAGS_DEBUG);
-    int superpages = !!hvm;
-    int race = 0, skip_this_iter = 0;
-    unsigned int sent_this_iter = 0;
-    int tmem_saved = 0;
-
-    /* The new domain's shared-info frame number. */
-    unsigned long shared_info_frame;
-
-    /* A copy of the CPU context of the guest. */
-    vcpu_guest_context_any_t ctxt;
-
-    /* A table containing the type of each PFN (/not/ MFN!). */
-    xen_pfn_t *pfn_type = NULL;
-    unsigned long *pfn_batch = NULL;
-    int *pfn_err = NULL;
-
-    /* A copy of one frame of guest memory. */
-    char page[PAGE_SIZE];
-
-    /* Live mapping of shared info structure */
-    shared_info_any_t *live_shinfo = NULL;
-
-    /* base of the region in which domain memory is mapped */
-    unsigned char *region_base = NULL;
-
-    /* A copy of the CPU eXtended States of the guest. */
-    DECLARE_HYPERCALL_BUFFER(void, buffer);
-
-    /* bitmap of pages:
-       - that should be sent this iteration (unless later marked as skip);
-       - to skip this iteration because already dirty;
-       - to fixup by sending at the end if not already resent; */
-    DECLARE_HYPERCALL_BUFFER(unsigned long, to_skip);
-    DECLARE_HYPERCALL_BUFFER(unsigned long, to_send);
-    unsigned long *to_fix = NULL;
-
-    struct time_stats time_stats;
-    xc_shadow_op_stats_t shadow_stats;
-
-    unsigned long needed_to_fix = 0;
-    unsigned long total_sent    = 0;
-
-    uint64_t vcpumap[XC_SR_MAX_VCPUS/64] = { 1ULL };
-
-    /* HVM: a buffer for holding HVM context */
-    uint32_t hvm_buf_size = 0;
-    uint8_t *hvm_buf = NULL;
-
-    /* HVM: magic frames for ioreqs and xenstore comms. */
-    uint64_t magic_pfns[3]; /* ioreq_pfn, bufioreq_pfn, store_pfn */
-
-    unsigned long mfn;
-
-    /* Without checkpoint compression, the dirty pages, pfn arrays
-     * and tailbuf (vcpu ctx, shared info page, etc.)  are written
-     * directly to outbuf. All of this is done while the domain is
-     * suspended.
-     *
-     * When checkpoint compression is enabled, the dirty pages are
-     * buffered, compressed "after" the domain is resumed and then
-     * written to outbuf. Since tailbuf data are collected while a
-     * domain is suspended, they cannot be directly written to the
-     * outbuf as there is no dirty page data preceeding tailbuf.
-     *
-     * So,two output buffers are maintained. Tailbuf data goes into
-     * ob_tailbuf. The dirty pages are compressed after resuming the
-     * domain and written to ob_pagebuf. ob_tailbuf is then appended
-     * to ob_pagebuf and finally flushed out.
-     */
-    struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL;
-    struct save_ctx _ctx;
-    struct save_ctx *ctx = &_ctx;
-    struct domain_info_context *dinfo = &ctx->dinfo;
-
-    /* Compression context */
-    comp_ctx *compress_ctx= NULL;
-    /* Even if XCFLAGS_CHECKPOINT_COMPRESS is set, we enable compression only
-     * after sending XC_SAVE_ID_ENABLE_COMPRESSION and the tailbuf for
-     * first time.
-     */
-    int compressing = 0;
-
-    int completed = 0;
-
-    if ( getenv("XG_MIGRATION_V2") )
-    {
-        return xc_domain_save2(xch, io_fd, dom, max_iters,
-                               max_factor, flags, callbacks, hvm);
-    }
-
-    DPRINTF("%s: starting save of domid %u", __func__, dom);
-
-    if ( hvm && !callbacks->switch_qemu_logdirty )
-    {
-        ERROR("No switch_qemu_logdirty callback provided.");
-        errno = EINVAL;
-        goto exit;
-    }
-
-    outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE);
-
-    memset(ctx, 0, sizeof(*ctx));
-
-    /* If no explicit control parameters given, use defaults */
-    max_iters  = max_iters  ? : DEF_MAX_ITERS;
-    max_factor = max_factor ? : DEF_MAX_FACTOR;
-
-    if ( !get_platform_info(xch, dom,
-                            &ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels, 
&dinfo->guest_width) )
-    {
-        ERROR("Unable to get platform info.");
-        goto exit;
-    }
-
-    if ( xc_domain_getinfo(xch, dom, 1, &info) != 1 )
-    {
-        PERROR("Could not get domain info");
-        goto exit;
-    }
-
-    shared_info_frame = info.shared_info_frame;
-
-    /* Map the shared info frame */
-    if ( !hvm )
-    {
-        live_shinfo = xc_map_foreign_range(xch, dom, PAGE_SIZE,
-                                           PROT_READ, shared_info_frame);
-        if ( !live_shinfo )
-        {
-            PERROR("Couldn't map live_shinfo");
-            goto out;
-        }
-    }
-
-    /* Get the size of the P2M table */
-    if ( xc_domain_nr_gpfns(xch, dom, &dinfo->p2m_size) < 0 )
-    {
-        ERROR("Could not get maximum GPFN!");
-        goto out;
-    }
-
-    if ( dinfo->p2m_size > ~XEN_DOMCTL_PFINFO_LTAB_MASK )
-    {
-        errno = E2BIG;
-        ERROR("Cannot save this big a guest");
-        goto out;
-    }
-
-    /* Domain is still running at this point */
-    if ( live )
-    {
-        /* Live suspend. Enable log-dirty mode. */
-        if ( xc_shadow_control(xch, dom,
-                               XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
-                               NULL, 0, NULL, 0, NULL) < 0 )
-        {
-            /* log-dirty already enabled? There's no test op,
-               so attempt to disable then reenable it */
-            frc = xc_shadow_control(xch, dom, XEN_DOMCTL_SHADOW_OP_OFF,
-                                    NULL, 0, NULL, 0, NULL);
-            if ( frc >= 0 )
-            {
-                frc = xc_shadow_control(xch, dom,
-                                        XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
-                                        NULL, 0, NULL, 0, NULL);
-            }
-            
-            if ( frc < 0 )
-            {
-                PERROR("Couldn't enable shadow mode (rc %d) (errno %d)", frc, 
errno );
-                goto out;
-            }
-        }
-
-        /* Enable qemu-dm logging dirty pages to xen */
-        if ( hvm && callbacks->switch_qemu_logdirty(dom, 1, callbacks->data) )
-        {
-            PERROR("Couldn't enable qemu log-dirty mode (errno %d)", errno);
-            goto out;
-        }
-    }
-    else
-    {
-        /* This is a non-live suspend. Suspend the domain .*/
-        if ( suspend_and_state(callbacks->suspend, callbacks->data, xch,
-                               io_fd, dom, &info) )
-        {
-            ERROR("Domain appears not to have suspended");
-            goto out;
-        }
-    }
-
-    if ( flags & XCFLAGS_CHECKPOINT_COMPRESS )
-    {
-        if (!(compress_ctx = xc_compression_create_context(xch, 
dinfo->p2m_size)))
-        {
-            ERROR("Failed to create compression context");
-            goto out;
-        }
-        outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4);
-    }
-
-    last_iter = !live;
-
-    /* Setup to_send / to_fix and to_skip bitmaps */
-    to_send = xc_hypercall_buffer_alloc_pages(xch, to_send, 
NRPAGES(bitmap_size(dinfo->p2m_size)));
-    to_skip = xc_hypercall_buffer_alloc_pages(xch, to_skip, 
NRPAGES(bitmap_size(dinfo->p2m_size)));
-    to_fix  = calloc(1, bitmap_size(dinfo->p2m_size));
-
-    if ( !to_send || !to_fix || !to_skip )
-    {
-        errno = ENOMEM;
-        ERROR("Couldn't allocate to_send array");
-        goto out;
-    }
-
-    memset(to_send, 0xff, bitmap_size(dinfo->p2m_size));
-
-    if ( hvm )
-    {
-        /* Need another buffer for HVM context */
-        hvm_buf_size = xc_domain_hvm_getcontext(xch, dom, 0, 0);
-        if ( hvm_buf_size == -1 )
-        {
-            PERROR("Couldn't get HVM context size from Xen");
-            goto out;
-        }
-        hvm_buf = malloc(hvm_buf_size);
-        if ( !hvm_buf )
-        {
-            errno = ENOMEM;
-            ERROR("Couldn't allocate memory");
-            goto out;
-        }
-    }
-
-    analysis_phase(xch, dom, ctx, HYPERCALL_BUFFER(to_skip), 0);
-
-    pfn_type   = malloc(ROUNDUP(MAX_BATCH_SIZE * sizeof(*pfn_type), 
PAGE_SHIFT));
-    pfn_batch  = calloc(MAX_BATCH_SIZE, sizeof(*pfn_batch));
-    pfn_err    = malloc(MAX_BATCH_SIZE * sizeof(*pfn_err));
-    if ( (pfn_type == NULL) || (pfn_batch == NULL) || (pfn_err == NULL) )
-    {
-        ERROR("failed to alloc memory for pfn_type and/or pfn_batch arrays");
-        errno = ENOMEM;
-        goto out;
-    }
-    memset(pfn_type, 0,
-           ROUNDUP(MAX_BATCH_SIZE * sizeof(*pfn_type), PAGE_SHIFT));
-
-    /* Setup the mfn_to_pfn table mapping */
-    if ( !(ctx->live_m2p = xc_map_m2p(xch, ctx->max_mfn, PROT_READ, 
&ctx->m2p_mfn0)) )
-    {
-        PERROR("Failed to map live M2P table");
-        goto out;
-    }
-
-    /* Start writing out the saved-domain record. */
-    if ( write_exact(io_fd, &dinfo->p2m_size, sizeof(unsigned long)) )
-    {
-        PERROR("write: p2m_size");
-        goto out;
-    }
-
-    if ( !hvm )
-    {
-        int err = 0;
-
-        /* Map the P2M table, and write the list of P2M frames */
-        ctx->live_p2m = map_and_save_p2m_table(xch, io_fd, dom, ctx, 
live_shinfo);
-        if ( ctx->live_p2m == NULL )
-        {
-            PERROR("Failed to map/save the p2m frame list");
-            goto out;
-        }
-
-        /*
-         * Quick belt and braces sanity check.
-         */
-        
-        for ( i = 0; i < dinfo->p2m_size; i++ )
-        {
-            mfn = pfn_to_mfn(i);
-            if( (mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i) )
-            {
-                DPRINTF("i=0x%x mfn=%lx live_m2p=%lx\n", i,
-                        mfn, mfn_to_pfn(mfn));
-                err++;
-            }
-        }
-        DPRINTF("Had %d unexplained entries in p2m table\n", err);
-    }
-
-    print_stats(xch, dom, 0, &time_stats, &shadow_stats, 0);
-
-    tmem_saved = xc_tmem_save(xch, dom, io_fd, live, XC_SAVE_ID_TMEM);
-    if ( tmem_saved == -1 )
-    {
-        PERROR("Error when writing to state file (tmem)");
-        goto out;
-    }
-
-    if ( !live && save_tsc_info(xch, dom, io_fd) < 0 )
-    {
-        PERROR("Error when writing to state file (tsc)");
-        goto out;
-    }
-
-  copypages:
-#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), 
(len))
-#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, 
(fd), (buf), (len))
-#define wrcompressed(fd) write_compressed(xch, compress_ctx, last_iter, ob, 
(fd))
-
-    ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */
-    /* Now write out each data page, canonicalising page tables as we go... */
-    for ( ; ; )
-    {
-        unsigned int N, batch, run;
-        char reportbuf[80];
-
-        snprintf(reportbuf, sizeof(reportbuf),
-                 "Saving memory: iter %d (last sent %u skipped %u)",
-                 iter, sent_this_iter, skip_this_iter);
-
-        xc_set_progress_prefix(xch, reportbuf);
-        xc_report_progress_step(xch, 0, dinfo->p2m_size);
-
-        iter++;
-        sent_this_iter = 0;
-        skip_this_iter = 0;
-        N = 0;
-
-        while ( N < dinfo->p2m_size )
-        {
-            xc_report_progress_step(xch, N, dinfo->p2m_size);
-
-            if ( !last_iter )
-            {
-                /* Slightly wasteful to peek the whole array every time,
-                   but this is fast enough for the moment. */
-                frc = xc_shadow_control(
-                    xch, dom, XEN_DOMCTL_SHADOW_OP_PEEK, 
HYPERCALL_BUFFER(to_skip),
-                    dinfo->p2m_size, NULL, 0, NULL);
-                if ( frc != dinfo->p2m_size )
-                {
-                    ERROR("Error peeking shadow bitmap");
-                    goto out;
-                }
-            }
-
-            /* load pfn_type[] with the mfn of all the pages we're doing in
-               this batch. */
-            for  ( batch = 0;
-                   (batch < MAX_BATCH_SIZE) && (N < dinfo->p2m_size);
-                   N++ )
-            {
-                int n = N;
-
-                if ( debug )
-                {
-                    DPRINTF("%d pfn= %08lx mfn= %08lx %d",
-                            iter, (unsigned long)n,
-                            hvm ? 0 : pfn_to_mfn(n),
-                            test_bit(n, to_send));
-                    if ( !hvm && is_mapped(pfn_to_mfn(n)) )
-                        DPRINTF("  [mfn]= %08lx",
-                                mfn_to_pfn(pfn_to_mfn(n)&0xFFFFF));
-                    DPRINTF("\n");
-                }
-
-                if ( completed )
-                {
-                    /* for sparse bitmaps, word-by-word may save time */
-                    if ( !to_send[N >> ORDER_LONG] )
-                    {
-                        /* incremented again in for loop! */
-                        N += BITS_PER_LONG - 1;
-                        continue;
-                    }
-
-                    if ( !test_bit(n, to_send) )
-                        continue;
-
-                    pfn_batch[batch] = n;
-                    if ( hvm )
-                        pfn_type[batch] = n;
-                    else
-                        pfn_type[batch] = pfn_to_mfn(n);
-                }
-                else
-                {
-                    int dont_skip = (last_iter || (superpages && iter==1));
-
-                    if ( !dont_skip &&
-                         test_bit(n, to_send) &&
-                         test_bit(n, to_skip) )
-                        skip_this_iter++; /* stats keeping */
-
-                    if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
-                           (test_bit(n, to_send) && dont_skip) ||
-                           (test_bit(n, to_fix)  && last_iter)) )
-                        continue;
-
-                    /* First time through, try to keep superpages in the same 
batch */
-                    if ( superpages && iter == 1
-                         && SUPER_PAGE_START(n)
-                         && batch + SUPERPAGE_NR_PFNS > MAX_BATCH_SIZE )
-                        break;
-
-                    /*
-                    ** we get here if:
-                    **  1. page is marked to_send & hasn't already been 
re-dirtied
-                    **  2. (ignore to_skip in first and last iterations)
-                    **  3. add in pages that still need fixup (net bufs)
-                    */
-
-                    pfn_batch[batch] = n;
-
-                    /* Hypercall interfaces operate in PFNs for HVM guests
-                     * and MFNs for PV guests */
-                    if ( hvm )
-                        pfn_type[batch] = n;
-                    else
-                        pfn_type[batch] = pfn_to_mfn(n);
-                    
-                    if ( !is_mapped(pfn_type[batch]) )
-                    {
-                        /*
-                        ** not currently in psuedo-physical map -- set bit
-                        ** in to_fix since we must send this page in last_iter
-                        ** unless its sent sooner anyhow, or it never enters
-                        ** pseudo-physical map (e.g. for ballooned down doms)
-                        */
-                        set_bit(n, to_fix);
-                        continue;
-                    }
-                    
-                    if ( last_iter &&
-                         test_bit(n, to_fix) &&
-                         !test_bit(n, to_send) )
-                    {
-                        needed_to_fix++;
-                        DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
-                                iter, n, pfn_type[batch]);
-                    }
-
-                    clear_bit(n, to_fix);
-                }
-                
-                batch++;
-            }
-
-            if ( batch == 0 )
-                goto skip; /* vanishingly unlikely... */
-
-            region_base = xc_map_foreign_bulk(
-                xch, dom, PROT_READ, pfn_type, pfn_err, batch);
-            if ( region_base == NULL )
-            {
-                PERROR("map batch failed");
-                goto out;
-            }
-
-            /* Get page types */
-            if ( xc_get_pfn_type_batch(xch, dom, batch, pfn_type) )
-            {
-                PERROR("get_pfn_type_batch failed");
-                goto out;
-            }
-
-            for ( run = j = 0; j < batch; j++ )
-            {
-                unsigned long gmfn = pfn_batch[j];
-
-                if ( !hvm )
-                    gmfn = pfn_to_mfn(gmfn);
-
-                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
-                {
-                    pfn_type[j] |= pfn_batch[j];
-                    ++run;
-                    continue;
-                }
-
-                if ( pfn_err[j] )
-                {
-                    if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
-                        continue;
-
-                    DPRINTF("map fail: page %i mfn %08lx err %d\n",
-                            j, gmfn, pfn_err[j]);
-                    pfn_type[j] = XEN_DOMCTL_PFINFO_XTAB;
-                    continue;
-                }
-
-                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
-                {
-                    DPRINTF("type fail: page %i mfn %08lx\n", j, gmfn);
-                    continue;
-                }
-
-                if ( superpages && iter==1 && test_bit(gmfn, to_skip))
-                    pfn_type[j] = XEN_DOMCTL_PFINFO_XALLOC;
-
-                /* canonicalise mfn->pfn */
-                pfn_type[j] |= pfn_batch[j];
-                ++run;
-
-                if ( debug )
-                {
-                    if ( hvm )
-                        DPRINTF("%d pfn=%08lx sum=%08lx\n",
-                                iter,
-                                pfn_type[j],
-                                csum_page(region_base + (PAGE_SIZE*j)));
-                    else
-                        DPRINTF("%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
-                                " sum= %08lx\n",
-                                iter,
-                                pfn_type[j],
-                                gmfn,
-                                mfn_to_pfn(gmfn),
-                                csum_page(region_base + (PAGE_SIZE*j)));
-                }
-            }
-
-            if ( !run )
-            {
-                munmap(region_base, batch*PAGE_SIZE);
-                continue; /* bail on this batch: no valid pages */
-            }
-
-            if ( wrexact(io_fd, &batch, sizeof(unsigned int)) )
-            {
-                PERROR("Error when writing to state file (2)");
-                goto out;
-            }
-
-            if ( sizeof(unsigned long) < sizeof(*pfn_type) )
-                for ( j = 0; j < batch; j++ )
-                    ((unsigned long *)pfn_type)[j] = pfn_type[j];
-            if ( wrexact(io_fd, pfn_type, sizeof(unsigned long)*batch) )
-            {
-                PERROR("Error when writing to state file (3)");
-                goto out;
-            }
-            if ( sizeof(unsigned long) < sizeof(*pfn_type) )
-                while ( --j >= 0 )
-                    pfn_type[j] = ((unsigned long *)pfn_type)[j];
-
-            /* entering this loop, pfn_type is now in pfns (Not mfns) */
-            run = 0;
-            for ( j = 0; j < batch; j++ )
-            {
-                unsigned long pfn, pagetype;
-                void *spage = (char *)region_base + (PAGE_SIZE*j);
-
-                pfn      = pfn_type[j] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
-                pagetype = pfn_type[j] &  XEN_DOMCTL_PFINFO_LTAB_MASK;
-
-                if ( pagetype != 0 )
-                {
-                    /* If the page is not a normal data page, write out any
-                       run of pages we may have previously acumulated */
-                    if ( !compressing && run )
-                    {
-                        if ( wruncached(io_fd, live,
-                                       (char*)region_base+(PAGE_SIZE*(j-run)), 
-                                       PAGE_SIZE*run) != PAGE_SIZE*run )
-                        {
-                            PERROR("Error when writing to state file (4a)"
-                                  " (errno %d)", errno);
-                            goto out;
-                        }                        
-                        run = 0;
-                    }
-                }
-
-                /*
-                 * skip pages that aren't present,
-                 * or are broken, or are alloc-only
-                 */
-                if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
-                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
-                    || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
-                    continue;
-
-                pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
-                if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
-                     (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
-                {
-                    /* We have a pagetable page: need to rewrite it. */
-                    race = 
-                        canonicalize_pagetable(ctx, pagetype, pfn, spage, 
page); 
-
-                    if ( race && !live )
-                    {
-                        ERROR("Fatal PT race (pfn %lx, type %08lx)", pfn,
-                              pagetype);
-                        goto out;
-                    }
-
-                    if (compressing)
-                    {
-                        int c_err;
-                        /* Mark pagetable page to be sent uncompressed */
-                        c_err = xc_compression_add_page(xch, compress_ctx, 
page,
-                                                        pfn, 1 /* raw page */);
-                        if (c_err == -2) /* OOB PFN */
-                        {
-                            ERROR("Could not add pagetable page "
-                                  "(pfn:%" PRIpfn "to page buffer\n", pfn);
-                            goto out;
-                        }
-
-                        if (c_err == -1)
-                        {
-                            /*
-                             * We are out of buffer space to hold dirty
-                             * pages. Compress and flush the current buffer
-                             * to make space. This is a corner case, that
-                             * slows down checkpointing as the compression
-                             * happens while domain is suspended. Happens
-                             * seldom and if you find this occuring
-                             * frequently, increase the PAGE_BUFFER_SIZE
-                             * in xc_compression.c.
-                             */
-                            if (wrcompressed(io_fd) < 0)
-                            {
-                                ERROR("Error when writing compressed"
-                                      " data (4b)\n");
-                                goto out;
-                            }
-                        }
-                    }
-                    else if ( wruncached(io_fd, live, page,
-                                         PAGE_SIZE) != PAGE_SIZE )
-                    {
-                        PERROR("Error when writing to state file (4b)"
-                              " (errno %d)", errno);
-                        goto out;
-                    }
-                }
-                else
-                {
-                    /* We have a normal page: accumulate it for writing. */
-                    if (compressing)
-                    {
-                        int c_err;
-                        /* For checkpoint compression, accumulate the page in 
the
-                         * page buffer, to be compressed later.
-                         */
-                        c_err = xc_compression_add_page(xch, compress_ctx, 
spage,
-                                                        pfn, 0 /* not raw page 
*/);
-
-                        if (c_err == -2) /* OOB PFN */
-                        {
-                            ERROR("Could not add page "
-                                  "(pfn:%" PRIpfn "to page buffer\n", pfn);
-                            goto out;
-                        }
-
-                        if (c_err == -1)
-                        {
-                            if (wrcompressed(io_fd) < 0)
-                            {
-                                ERROR("Error when writing compressed"
-                                      " data (4c)\n");
-                                goto out;
-                            }
-                        }
-                    }
-                    else
-                        run++;
-                }
-            } /* end of the write out for this batch */
-
-            if ( run )
-            {
-                /* write out the last accumulated run of pages */
-                if ( wruncached(io_fd, live,
-                               (char*)region_base+(PAGE_SIZE*(j-run)), 
-                               PAGE_SIZE*run) != PAGE_SIZE*run )
-                {
-                    PERROR("Error when writing to state file (4c)"
-                          " (errno %d)", errno);
-                    goto out;
-                }                        
-            }
-
-            sent_this_iter += batch;
-
-            munmap(region_base, batch*PAGE_SIZE);
-
-        } /* end of this while loop for this iteration */
-
-      skip:
-
-        xc_report_progress_step(xch, dinfo->p2m_size, dinfo->p2m_size);
-
-        total_sent += sent_this_iter;
-
-        if ( last_iter )
-        {
-            print_stats( xch, dom, sent_this_iter, &time_stats, &shadow_stats, 
1);
-
-            DPRINTF("Total pages sent= %ld (%.2fx)\n",
-                    total_sent, ((float)total_sent)/dinfo->p2m_size );
-            DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
-        }
-
-        if ( last_iter && debug )
-        {
-            int id = XC_SAVE_ID_ENABLE_VERIFY_MODE;
-            memset(to_send, 0xff, bitmap_size(dinfo->p2m_size));
-            debug = 0;
-            DPRINTF("Entering debug resend-all mode\n");
-
-            /* send "-1" to put receiver into debug mode */
-            if ( wrexact(io_fd, &id, sizeof(int)) )
-            {
-                PERROR("Error when writing to state file (6)");
-                goto out;
-            }
-
-            continue;
-        }
-
-        if ( last_iter )
-            break;
-
-        if ( live )
-        {
-            if ( (iter >= max_iters) ||
-                 (sent_this_iter+skip_this_iter < 50) ||
-                 (total_sent > dinfo->p2m_size*max_factor) )
-            {
-                DPRINTF("Start last iteration\n");
-                last_iter = 1;
-
-                if ( suspend_and_state(callbacks->suspend, callbacks->data,
-                                       xch, io_fd, dom, &info) )
-                {
-                    ERROR("Domain appears not to have suspended");
-                    goto out;
-                }
-
-                DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
-                if ( (tmem_saved > 0) &&
-                     (xc_tmem_save_extra(xch,dom,io_fd,XC_SAVE_ID_TMEM_EXTRA) 
== -1) )
-                {
-                        PERROR("Error when writing to state file (tmem)");
-                        goto out;
-                }
-
-                if ( save_tsc_info(xch, dom, io_fd) < 0 )
-                {
-                    PERROR("Error when writing to state file (tsc)");
-                    goto out;
-                }
-
-
-            }
-
-            if ( xc_shadow_control(xch, dom,
-                                   XEN_DOMCTL_SHADOW_OP_CLEAN, 
HYPERCALL_BUFFER(to_send),
-                                   dinfo->p2m_size, NULL, 0, &shadow_stats) != 
dinfo->p2m_size )
-            {
-                PERROR("Error flushing shadow PT");
-                goto out;
-            }
-
-            print_stats(xch, dom, sent_this_iter, &time_stats, &shadow_stats, 
1);
-
-        }
-    } /* end of infinite for loop */
-
-    DPRINTF("All memory is saved\n");
-
-    /* After last_iter, buffer the rest of pagebuf & tailbuf data into a
-     * separate output buffer and flush it after the compressed page chunks.
-     */
-    if (compressing)
-    {
-        ob = &ob_tailbuf;
-        ob->pos = 0;
-    }
-
-    {
-        struct chunk {
-            int id;
-            int max_vcpu_id;
-            uint64_t vcpumap[XC_SR_MAX_VCPUS/64];
-        } chunk = { XC_SAVE_ID_VCPU_INFO, info.max_vcpu_id };
-
-        if ( info.max_vcpu_id >= XC_SR_MAX_VCPUS )
-        {
-            errno = E2BIG;
-            ERROR("Too many VCPUS in guest!");
-            goto out;
-        }
-
-        for ( i = 1; i <= info.max_vcpu_id; i++ )
-        {
-            xc_vcpuinfo_t vinfo;
-            if ( (xc_vcpu_getinfo(xch, dom, i, &vinfo) == 0) &&
-                 vinfo.online )
-                vcpumap[i/64] |= 1ULL << (i%64);
-        }
-
-        memcpy(chunk.vcpumap, vcpumap, vcpumap_sz(info.max_vcpu_id));
-        if ( wrexact(io_fd, &chunk, offsetof(struct chunk, vcpumap)
-                     + vcpumap_sz(info.max_vcpu_id)) )
-        {
-            PERROR("Error when writing to state file");
-            goto out;
-        }
-    }
-
-    if ( hvm )
-    {
-        struct {
-            int id;
-            uint32_t pad;
-            uint64_t data;
-        } chunk = { 0, };
-
-        chunk.id = XC_SAVE_ID_HVM_GENERATION_ID_ADDR;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_VM_GENERATION_ID_ADDR, 
&chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the generation id buffer location for 
guest");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_IDENT_PT;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_IDENT_PT, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the ident_pt for EPT guest");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_PAGING_RING_PFN;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_PAGING_RING_PFN, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the paging ring pfn for guest");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_MONITOR_RING_PFN;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_MONITOR_RING_PFN, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the access ring pfn for guest");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_SHARING_RING_PFN;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_SHARING_RING_PFN, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the sharing ring pfn for guest");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_VM86_TSS;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_VM86_TSS, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the vm86 TSS for guest");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_CONSOLE_PFN;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_CONSOLE_PFN, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the console pfn for guest");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_ACPI_IOPORTS_LOCATION, 
&chunk.data);
-
-        if ((chunk.data != 0) && wrexact(io_fd, &chunk, sizeof(chunk)))
-        {
-            PERROR("Error when writing the firmware ioport version");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_VIRIDIAN;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_VIRIDIAN, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the viridian flag");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_IOREQ_SERVER_PFN;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_IOREQ_SERVER_PFN, &chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the ioreq server gmfn base");
-            goto out;
-        }
-
-        chunk.id = XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES;
-        chunk.data = 0;
-        xc_hvm_param_get(xch, dom, HVM_PARAM_NR_IOREQ_SERVER_PAGES, 
&chunk.data);
-
-        if ( (chunk.data != 0) &&
-             wrexact(io_fd, &chunk, sizeof(chunk)) )
-        {
-            PERROR("Error when writing the ioreq server gmfn count");
-            goto out;
-        }
-    }
-
-    if ( callbacks != NULL && callbacks->toolstack_save != NULL )
-    {
-        int id = XC_SAVE_ID_TOOLSTACK;
-        uint8_t *buf;
-        uint32_t len;
-
-        if ( callbacks->toolstack_save(dom, &buf, &len, callbacks->data) < 0 )
-        {
-            PERROR("Error calling toolstack_save");
-            goto out;
-        }
-        wrexact(io_fd, &id, sizeof(id));
-        wrexact(io_fd, &len, sizeof(len));
-        wrexact(io_fd, buf, len);
-        free(buf);
-    }
-
-    if ( !callbacks->checkpoint )
-    {
-        /*
-         * If this is not a checkpointed save then this must be the first and
-         * last checkpoint.
-         */
-        i = XC_SAVE_ID_LAST_CHECKPOINT;
-        if ( wrexact(io_fd, &i, sizeof(int)) )
-        {
-            PERROR("Error when writing last checkpoint chunk");
-            goto out;
-        }
-    }
-
-    /* Enable compression logic on both sides by sending this
-     * one time marker.
-     * NOTE: We could have simplified this procedure by sending
-     * the enable/disable compression flag before the beginning of
-     * the main for loop. But this would break compatibility for
-     * live migration code, with older versions of xen. So we have
-     * to enable it after the last_iter, when the XC_SAVE_ID_*
-     * elements are sent.
-     */
-    if (!compressing && (flags & XCFLAGS_CHECKPOINT_COMPRESS))
-    {
-        i = XC_SAVE_ID_ENABLE_COMPRESSION;
-        if ( wrexact(io_fd, &i, sizeof(int)) )
-        {
-            PERROR("Error when writing enable_compression marker");
-            goto out;
-        }
-    }
-
-    /* Zero terminate */
-    i = 0;
-    if ( wrexact(io_fd, &i, sizeof(int)) )
-    {
-        PERROR("Error when writing to state file (6')");
-        goto out;
-    }
-
-    if ( hvm ) 
-    {
-        uint32_t rec_size;
-
-        /* Save magic-page locations. */
-        memset(magic_pfns, 0, sizeof(magic_pfns));
-        xc_hvm_param_get(xch, dom, HVM_PARAM_IOREQ_PFN, &magic_pfns[0]);
-        xc_hvm_param_get(xch, dom, HVM_PARAM_BUFIOREQ_PFN, &magic_pfns[1]);
-        xc_hvm_param_get(xch, dom, HVM_PARAM_STORE_PFN, &magic_pfns[2]);
-        if ( wrexact(io_fd, magic_pfns, sizeof(magic_pfns)) )
-        {
-            PERROR("Error when writing to state file (7)");
-            goto out;
-        }
-
-        /* Get HVM context from Xen and save it too */
-        if ( (rec_size = xc_domain_hvm_getcontext(xch, dom, hvm_buf, 
-                                                  hvm_buf_size)) == -1 )
-        {
-            PERROR("HVM:Could not get hvm buffer");
-            goto out;
-        }
-        
-        if ( wrexact(io_fd, &rec_size, sizeof(uint32_t)) )
-        {
-            PERROR("error write hvm buffer size");
-            goto out;
-        }
-        
-        if ( wrexact(io_fd, hvm_buf, rec_size) )
-        {
-            PERROR("write HVM info failed!");
-            goto out;
-        }
-        
-        /* HVM guests are done now */
-        goto success;
-    }
-
-    /* PV guests only from now on */
-
-    /* Send through a list of all the PFNs that were not in map at the close */
-    {
-        unsigned int i,j;
-        unsigned long pfntab[1024];
-
-        for ( i = 0, j = 0; i < dinfo->p2m_size; i++ )
-        {
-            if ( !is_mapped(pfn_to_mfn(i)) )
-                j++;
-        }
-
-        if ( wrexact(io_fd, &j, sizeof(unsigned int)) )
-        {
-            PERROR("Error when writing to state file (6a)");
-            goto out;
-        }
-
-        for ( i = 0, j = 0; i < dinfo->p2m_size; )
-        {
-            if ( !is_mapped(pfn_to_mfn(i)) )
-                pfntab[j++] = i;
-
-            i++;
-            if ( (j == 1024) || (i == dinfo->p2m_size) )
-            {
-                if ( wrexact(io_fd, &pfntab, sizeof(unsigned long)*j) )
-                {
-                    PERROR("Error when writing to state file (6b)");
-                    goto out;
-                }
-                j = 0;
-            }
-        }
-    }
-
-    if ( xc_vcpu_getcontext(xch, dom, 0, &ctxt) )
-    {
-        PERROR("Could not get vcpu context");
-        goto out;
-    }
-
-    /*
-     * Canonicalise the start info frame number.
-     *
-     * The start info MFN is the 3rd argument to the
-     * HYPERVISOR_sched_op hypercall when op==SCHEDOP_shutdown and
-     * reason==SHUTDOWN_suspend and is therefore found in the edx
-     * register.
-     */
-    mfn = GET_FIELD(&ctxt, user_regs.edx, dinfo->guest_width);
-    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
-    {
-        errno = ERANGE;
-        ERROR("Suspend record is not in range of pseudophys map");
-        goto out;
-    }
-    SET_FIELD(&ctxt, user_regs.edx, mfn_to_pfn(mfn), dinfo->guest_width);
-
-    for ( i = 0; i <= info.max_vcpu_id; i++ )
-    {
-        if ( !(vcpumap[i/64] & (1ULL << (i%64))) )
-            continue;
-
-        if ( (i != 0) && xc_vcpu_getcontext(xch, dom, i, &ctxt) )
-        {
-            PERROR("No context for VCPU%d", i);
-            goto out;
-        }
-
-        /* Canonicalise each GDT frame number. */
-        for ( j = 0; (512*j) < GET_FIELD(&ctxt, gdt_ents, dinfo->guest_width); 
j++ )
-        {
-            mfn = GET_FIELD(&ctxt, gdt_frames[j], dinfo->guest_width);
-            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
-            {
-                errno = ERANGE;
-                ERROR("GDT frame is not in range of pseudophys map");
-                goto out;
-            }
-            SET_FIELD(&ctxt, gdt_frames[j], mfn_to_pfn(mfn), 
dinfo->guest_width);
-        }
-
-        /* Canonicalise the page table base pointer. */
-        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(
-                 UNFOLD_CR3(GET_FIELD(&ctxt, ctrlreg[3], dinfo->guest_width))) 
)
-        {
-            errno = ERANGE;
-            ERROR("PT base is not in range of pseudophys map");
-            goto out;
-        }
-        SET_FIELD(&ctxt, ctrlreg[3], 
-                  FOLD_CR3(mfn_to_pfn(UNFOLD_CR3(
-                                          GET_FIELD(&ctxt, ctrlreg[3], 
dinfo->guest_width)
-                                          ))), dinfo->guest_width);
-
-        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
-        if ( (ctx->pt_levels == 4) && ctxt.x64.ctrlreg[1] )
-        {
-            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(UNFOLD_CR3(ctxt.x64.ctrlreg[1])) )
-            {
-                errno = ERANGE;
-                ERROR("PT base is not in range of pseudophys map");
-                goto out;
-            }
-            /* Least-significant bit means 'valid PFN'. */
-            ctxt.x64.ctrlreg[1] = 1 |
-                FOLD_CR3(mfn_to_pfn(UNFOLD_CR3(ctxt.x64.ctrlreg[1])));
-        }
-
-        if ( wrexact(io_fd, &ctxt, ((dinfo->guest_width==8) 
-                                        ? sizeof(ctxt.x64) 
-                                        : sizeof(ctxt.x32))) )
-        {
-            PERROR("Error when writing to state file (1)");
-            goto out;
-        }
-
-        domctl.cmd = XEN_DOMCTL_get_ext_vcpucontext;
-        domctl.domain = dom;
-        memset(&domctl.u, 0, sizeof(domctl.u));
-        domctl.u.ext_vcpucontext.vcpu = i;
-        if ( xc_domctl(xch, &domctl) < 0 )
-        {
-            PERROR("No extended context for VCPU%d", i);
-            goto out;
-        }
-        if ( wrexact(io_fd, &domctl.u.ext_vcpucontext, 128) )
-        {
-            PERROR("Error when writing to state file (2)");
-            goto out;
-        }
-
-        /* Check there are no PV MSRs in use. */
-        domctl.cmd = XEN_DOMCTL_get_vcpu_msrs;
-        domctl.domain = dom;
-        memset(&domctl.u, 0, sizeof(domctl.u));
-        domctl.u.vcpu_msrs.vcpu = i;
-        domctl.u.vcpu_msrs.msr_count = 0;
-        set_xen_guest_handle_raw(domctl.u.vcpu_msrs.msrs, (void*)1);
-
-        if ( xc_domctl(xch, &domctl) < 0 )
-        {
-            if ( errno == ENOBUFS )
-            {
-                errno = EOPNOTSUPP;
-                PERROR("Unable to migrate PV guest using MSRs (yet)");
-            }
-            else
-                PERROR("Error querying maximum number of MSRs for VCPU%d", i);
-            goto out;
-        }
-
-        /* Start to fetch CPU eXtended States */
-        /* Get buffer size first */
-        domctl.cmd = XEN_DOMCTL_getvcpuextstate;
-        domctl.domain = dom;
-        domctl.u.vcpuextstate.vcpu = i;
-        domctl.u.vcpuextstate.xfeature_mask = 0;
-        domctl.u.vcpuextstate.size = 0;
-        if ( xc_domctl(xch, &domctl) < 0 )
-        {
-            PERROR("No eXtended states (XSAVE) for VCPU%d", i);
-            goto out;
-        }
-
-        if ( !domctl.u.vcpuextstate.xfeature_mask )
-            continue;
-
-        /* Getting eXtended states data */
-        buffer = xc_hypercall_buffer_alloc(xch, buffer, 
domctl.u.vcpuextstate.size);
-        if ( !buffer )
-        {
-            PERROR("Insufficient memory for getting eXtended states for"
-                   "VCPU%d", i);
-            goto out;
-        }
-        set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
-        if ( xc_domctl(xch, &domctl) < 0 )
-        {
-            PERROR("No eXtended states (XSAVE) for VCPU%d", i);
-            xc_hypercall_buffer_free(xch, buffer);
-            goto out;
-        }
-
-        if ( wrexact(io_fd, &domctl.u.vcpuextstate.xfeature_mask,
-                     sizeof(domctl.u.vcpuextstate.xfeature_mask)) ||
-             wrexact(io_fd, &domctl.u.vcpuextstate.size,
-                     sizeof(domctl.u.vcpuextstate.size)) ||
-             wrexact(io_fd, buffer, domctl.u.vcpuextstate.size) )
-        {
-            PERROR("Error when writing to state file VCPU extended state");
-            xc_hypercall_buffer_free(xch, buffer);
-            goto out;
-        }
-        xc_hypercall_buffer_free(xch, buffer);
-    }
-
-    /*
-     * Reset the MFN to be a known-invalid value. See map_frame_list_list().
-     */
-    memcpy(page, live_shinfo, PAGE_SIZE);
-    SET_FIELD(((shared_info_any_t *)page), 
-              arch.pfn_to_mfn_frame_list_list, 0, dinfo->guest_width);
-    if ( wrexact(io_fd, page, PAGE_SIZE) )
-    {
-        PERROR("Error when writing to state file (1)");
-        goto out;
-    }
-
-    /* Flush last write and check for errors. */
-    if ( fsync(io_fd) && errno != EINVAL )
-    {
-        PERROR("Error when flushing state file");
-        goto out;
-    }
-
-    /* Success! */
- success:
-    rc = errno = 0;
-    goto out_rc;
-
- out:
-    rc = errno;
-    assert(rc);
- out_rc:
-    completed = 1;
-
-    if ( !rc && callbacks->postcopy )
-        callbacks->postcopy(callbacks->data);
-
-    /* guest has been resumed. Now we can compress data
-     * at our own pace.
-     */
-    if (!rc && compressing)
-    {
-        ob = &ob_pagebuf;
-        if (wrcompressed(io_fd) < 0)
-        {
-            ERROR("Error when writing compressed data, after postcopy\n");
-            goto out;
-        }
-        /* Append the tailbuf data to the main outbuf */
-        if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) )
-        {
-            PERROR("Error when copying tailbuf into outbuf");
-            goto out;
-        }
-    }
-
-    /* Flush last write and discard cache for file. */
-    if ( ob && outbuf_flush(xch, ob, io_fd) < 0 ) {
-        PERROR("Error when flushing output buffer");
-        if (!rc)
-            rc = errno;
-    }
-
-    discard_file_cache(xch, io_fd, 1 /* flush */);
-
-    /* Enable compression now, finally */
-    compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
-
-    /* checkpoint_cb can spend arbitrarily long in between rounds */
-    if (!rc && callbacks->checkpoint &&
-        callbacks->checkpoint(callbacks->data) > 0)
-    {
-        /* reset stats timer */
-        print_stats(xch, dom, 0, &time_stats, &shadow_stats, 0);
-
-        /* last_iter = 1; */
-        if ( suspend_and_state(callbacks->suspend, callbacks->data, xch,
-                               io_fd, dom, &info) )
-        {
-            ERROR("Domain appears not to have suspended");
-            goto out;
-        }
-        DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
-        print_stats(xch, dom, 0, &time_stats, &shadow_stats, 1);
-
-        if ( xc_shadow_control(xch, dom,
-                               XEN_DOMCTL_SHADOW_OP_CLEAN, 
HYPERCALL_BUFFER(to_send),
-                               dinfo->p2m_size, NULL, 0, &shadow_stats) != 
dinfo->p2m_size )
-        {
-            PERROR("Error flushing shadow PT");
-        }
-
-        goto copypages;
-    }
-
-    if ( tmem_saved != 0 && live )
-        xc_tmem_save_done(xch, dom);
-
-    if ( live )
-    {
-        if ( xc_shadow_control(xch, dom, 
-                               XEN_DOMCTL_SHADOW_OP_OFF,
-                               NULL, 0, NULL, 0, NULL) < 0 )
-            DPRINTF("Warning - couldn't disable shadow mode");
-        if ( hvm && callbacks->switch_qemu_logdirty(dom, 0, callbacks->data) )
-            DPRINTF("Warning - couldn't disable qemu log-dirty mode");
-    }
-
-    if (compress_ctx)
-        xc_compression_free_context(xch, compress_ctx);
-
-    if ( live_shinfo )
-        munmap(live_shinfo, PAGE_SIZE);
-
-    if ( ctx->live_p2m )
-        munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
-
-    if ( ctx->live_m2p )
-        munmap(ctx->live_m2p, M2P_SIZE(ctx->max_mfn));
-
-    xc_hypercall_buffer_free_pages(xch, to_send, 
NRPAGES(bitmap_size(dinfo->p2m_size)));
-    xc_hypercall_buffer_free_pages(xch, to_skip, 
NRPAGES(bitmap_size(dinfo->p2m_size)));
-
-    free(pfn_type);
-    free(pfn_batch);
-    free(pfn_err);
-    free(to_fix);
-    free(hvm_buf);
-    outbuf_free(&ob_pagebuf);
-
-    errno = rc;
-exit:
-    DPRINTF("Save exit of domid %u with errno=%d\n", dom, errno);
-
-    return !!errno;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xc_offline_page.c b/tools/libxc/xc_offline_page.c
index b1d169c..c2a8230 100644
--- a/tools/libxc/xc_offline_page.c
+++ b/tools/libxc/xc_offline_page.c
@@ -396,6 +396,65 @@ static int is_page_exchangable(xc_interface *xch, int 
domid, xen_pfn_t mfn,
     return 1;
 }
 
+xen_pfn_t *xc_map_m2p(xc_interface *xch,
+                      unsigned long max_mfn,
+                      int prot,
+                      unsigned long *mfn0)
+{
+    privcmd_mmap_entry_t *entries;
+    unsigned long m2p_chunks, m2p_size;
+    xen_pfn_t *m2p;
+    xen_pfn_t *extent_start;
+    int i;
+
+    m2p = NULL;
+    m2p_size   = M2P_SIZE(max_mfn);
+    m2p_chunks = M2P_CHUNKS(max_mfn);
+
+    extent_start = calloc(m2p_chunks, sizeof(xen_pfn_t));
+    if ( !extent_start )
+    {
+        ERROR("failed to allocate space for m2p mfns");
+        goto err0;
+    }
+
+    if ( xc_machphys_mfn_list(xch, m2p_chunks, extent_start) )
+    {
+        PERROR("xc_get_m2p_mfns");
+        goto err1;
+    }
+
+    entries = calloc(m2p_chunks, sizeof(privcmd_mmap_entry_t));
+    if (entries == NULL)
+    {
+        ERROR("failed to allocate space for mmap entries");
+        goto err1;
+    }
+
+    for ( i = 0; i < m2p_chunks; i++ )
+        entries[i].mfn = extent_start[i];
+
+    m2p = xc_map_foreign_ranges(xch, DOMID_XEN,
+                       m2p_size, prot, M2P_CHUNK_SIZE,
+                       entries, m2p_chunks);
+    if (m2p == NULL)
+    {
+        PERROR("xc_mmap_foreign_ranges failed");
+        goto err2;
+    }
+
+    if (mfn0)
+        *mfn0 = entries[0].mfn;
+
+err2:
+    free(entries);
+err1:
+    free(extent_start);
+
+err0:
+    return m2p;
+}
+
 /* The domain should be suspended when called here */
 int xc_exchange_page(xc_interface *xch, int domid, xen_pfn_t mfn)
 {
diff --git a/tools/libxc/xg_save_restore.h b/tools/libxc/xg_save_restore.h
index 57d4e8f..c2686af 100644
--- a/tools/libxc/xg_save_restore.h
+++ b/tools/libxc/xg_save_restore.h
@@ -22,248 +22,6 @@
 #include <xen/foreign/x86_64.h>
 
 /*
- * SAVE/RESTORE/MIGRATE PROTOCOL
- * =============================
- *
- * The general form of a stream of chunks is a header followed by a
- * body consisting of a variable number of chunks (terminated by a
- * chunk with type 0) followed by a trailer.
- *
- * For a rolling/checkpoint (e.g. remus) migration then the body and
- * trailer phases can be repeated until an external event
- * (e.g. failure) causes the process to terminate and commit to the
- * most recent complete checkpoint.
- *
- * HEADER
- * ------
- *
- * unsigned long        : p2m_size
- *
- * extended-info (PV-only, optional):
- *
- *   If first unsigned long == ~0UL then extended info is present,
- *   otherwise unsigned long is part of p2m. Note that p2m_size above
- *   does not include the length of the extended info.
- *
- *   extended-info:
- *
- *     unsigned long    : signature == ~0UL
- *     uint32_t                : number of bytes remaining in extended-info
- *
- *     1 or more extended-info blocks of form:
- *     char[4]          : block identifier
- *     uint32_t         : block data size
- *     bytes            : block data
- *
- *     defined extended-info blocks:
- *     "vcpu"          : VCPU context info containing vcpu_guest_context_t.
- *                        The precise variant of the context structure
- *                        (e.g. 32 vs 64 bit) is distinguished by
- *                        the block size.
- *     "extv"           : Presence indicates use of extended VCPU context in
- *                        tail, data size is 0.
- *
- * p2m (PV-only):
- *
- *   consists of p2m_size bytes comprising an array of xen_pfn_t sized entries.
- *
- * BODY PHASE - Format A (for live migration or Remus without compression)
- * ----------
- *
- * A series of chunks with a common header:
- *   int              : chunk type
- *
- * If the chunk type is +ve then chunk contains guest memory data, and the
- * type contains the number of pages in the batch:
- *
- *     unsigned long[]  : PFN array, length == number of pages in batch
- *                        Each entry consists of XEN_DOMCTL_PFINFO_*
- *                        in bits 31-28 and the PFN number in bits 27-0.
- *     page data        : PAGE_SIZE bytes for each page marked present in PFN
- *                        array
- *
- * If the chunk type is -ve then chunk consists of one of a number of
- * metadata types.  See definitions of XC_SAVE_ID_* below.
- *
- * If chunk type is 0 then body phase is complete.
- *
- *
- * BODY PHASE - Format B (for Remus with compression)
- * ----------
- *
- * A series of chunks with a common header:
- *   int              : chunk type
- *
- * If the chunk type is +ve then chunk contains array of PFNs corresponding
- * to guest memory and type contains the number of PFNs in the batch:
- *
- *     unsigned long[]  : PFN array, length == number of pages in batch
- *                        Each entry consists of XEN_DOMCTL_PFINFO_*
- *                        in bits 31-28 and the PFN number in bits 27-0.
- *
- * If the chunk type is -ve then chunk consists of one of a number of
- * metadata types.  See definitions of XC_SAVE_ID_* below.
- *
- * If the chunk type is -ve and equals XC_SAVE_ID_COMPRESSED_DATA, then the
- * chunk consists of compressed page data, in the following format:
- *
- *     unsigned long        : Size of the compressed chunk to follow
- *     compressed data :      variable length data of size indicated above.
- *                            This chunk consists of compressed page data.
- *                            The number of pages in one chunk depends on
- *                            the amount of space available in the sender's
- *                            output buffer.
- *
- * Format of compressed data:
- *   compressed_data = <deltas>*
- *   delta           = <marker, run*>
- *   marker          = (RUNFLAG|SKIPFLAG) bitwise-or RUNLEN [1 byte marker]
- *   RUNFLAG         = 0
- *   SKIPFLAG        = 1 << 7
- *   RUNLEN          = 7-bit unsigned value indicating number of WORDS in the 
run
- *   run             = string of bytes of length sizeof(WORD) * RUNLEN
- *
- *    If marker contains RUNFLAG, then RUNLEN * sizeof(WORD) bytes of data 
following
- *   the marker is copied into the target page at the appropriate offset 
indicated by
- *   the offset_ptr
- *    If marker contains SKIPFLAG, then the offset_ptr is advanced
- *   by RUNLEN * sizeof(WORD).
- *
- * If chunk type is 0 then body phase is complete.
- *
- * There can be one or more chunks with type XC_SAVE_ID_COMPRESSED_DATA,
- * containing compressed pages. The compressed chunks are collated to form
- * one single compressed chunk for the entire iteration. The number of pages
- * present in this final compressed chunk will be equal to the total number
- * of valid PFNs specified by the +ve chunks.
- *
- * At the sender side, compressed pages are inserted into the output stream
- * in the same order as they would have been if compression logic was absent.
- *
- * Until last iteration, the BODY is sent in Format A, to maintain live
- * migration compatibility with receivers of older Xen versions.
- * At the last iteration, if Remus compression was enabled, the sender sends
- * a trigger, XC_SAVE_ID_ENABLE_COMPRESSION to tell the receiver to parse the
- * BODY in Format B from the next iteration onwards.
- *
- * An example sequence of chunks received in Format B:
- *     +16                              +ve chunk
- *     unsigned long[16]                PFN array
- *     +100                             +ve chunk
- *     unsigned long[100]               PFN array
- *     +50                              +ve chunk
- *     unsigned long[50]                PFN array
- *
- *     XC_SAVE_ID_COMPRESSED_DATA       TAG
- *       N                              Length of compressed data
- *       N bytes of DATA                Decompresses to 166 pages
- *
- *     XC_SAVE_ID_*                     other xc save chunks
- *     0                                END BODY TAG
- *
- * Corner case with checkpoint compression:
- *     At sender side, after pausing the domain, dirty pages are usually
- *   copied out to a temporary buffer. After the domain is resumed,
- *   compression is done and the compressed chunk(s) are sent, followed by
- *   other XC_SAVE_ID_* chunks.
- *     If the temporary buffer gets full while scanning for dirty pages,
- *   the sender stops buffering of dirty pages, compresses the temporary
- *   buffer and sends the compressed data with XC_SAVE_ID_COMPRESSED_DATA.
- *   The sender then resumes the buffering of dirty pages and continues
- *   scanning for the dirty pages.
- *     For e.g., assume that the temporary buffer can hold 4096 pages and
- *   there are 5000 dirty pages. The following is the sequence of chunks
- *   that the receiver will see:
- *
- *     +1024                       +ve chunk
- *     unsigned long[1024]         PFN array
- *     +1024                       +ve chunk
- *     unsigned long[1024]         PFN array
- *     +1024                       +ve chunk
- *     unsigned long[1024]         PFN array
- *     +1024                       +ve chunk
- *     unsigned long[1024]         PFN array
- *
- *     XC_SAVE_ID_COMPRESSED_DATA  TAG
- *      N                          Length of compressed data
- *      N bytes of DATA            Decompresses to 4096 pages
- *
- *     +4                          +ve chunk
- *     unsigned long[4]            PFN array
- *
- *     XC_SAVE_ID_COMPRESSED_DATA  TAG
- *      M                          Length of compressed data
- *      M bytes of DATA            Decompresses to 4 pages
- *
- *     XC_SAVE_ID_*                other xc save chunks
- *     0                           END BODY TAG
- *
- *     In other words, XC_SAVE_ID_COMPRESSED_DATA can be interleaved with
- *   +ve chunks arbitrarily. But at the receiver end, the following condition
- *   always holds true until the end of BODY PHASE:
- *    num(PFN entries +ve chunks) >= num(pages received in compressed form)
- *
- * TAIL PHASE
- * ----------
- *
- * Content differs for PV and HVM guests.
- *
- * HVM TAIL:
- *
- *  "Magic" pages:
- *     uint64_t         : I/O req PFN
- *     uint64_t         : Buffered I/O req PFN
- *     uint64_t         : Store PFN
- *  Xen HVM Context:
- *     uint32_t         : Length of context in bytes
- *     bytes            : Context data
- *  Qemu context:
- *     char[21]         : Signature:
- *       "QemuDeviceModelRecord" : Read Qemu save data until EOF
- *       "DeviceModelRecord0002" : uint32_t length field followed by that many
- *                                 bytes of Qemu save data
- *       "RemusDeviceModelState" : Currently the same as 
"DeviceModelRecord0002".
- *
- * PV TAIL:
- *
- *  Unmapped PFN list   : list of all the PFNs that were not in map at the 
close
- *     unsigned int     : Number of unmapped pages
- *     unsigned long[]  : PFNs of unmapped pages
- *
- *  VCPU context data   : A series of VCPU records, one per present VCPU
- *                        Maximum and present map supplied in 
XC_SAVE_ID_VCPUINFO
- *     bytes:           : VCPU context structure. Size is determined by size
- *                        provided in extended-info header
- *     bytes[128]       : Extended VCPU context (present IFF "extv" block
- *                        present in extended-info header)
- *
- *  Shared Info Page    : 4096 bytes of shared info page
- */
-
-#define XC_SAVE_ID_ENABLE_VERIFY_MODE -1 /* Switch to validation phase. */
-#define XC_SAVE_ID_VCPU_INFO          -2 /* Additional VCPU info */
-#define XC_SAVE_ID_HVM_IDENT_PT       -3 /* (HVM-only) */
-#define XC_SAVE_ID_HVM_VM86_TSS       -4 /* (HVM-only) */
-#define XC_SAVE_ID_TMEM               -5
-#define XC_SAVE_ID_TMEM_EXTRA         -6
-#define XC_SAVE_ID_TSC_INFO           -7
-#define XC_SAVE_ID_HVM_CONSOLE_PFN    -8 /* (HVM-only) */
-#define XC_SAVE_ID_LAST_CHECKPOINT    -9 /* Commit to restoring after 
completion of current iteration. */
-#define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10
-#define XC_SAVE_ID_HVM_VIRIDIAN       -11
-#define XC_SAVE_ID_COMPRESSED_DATA    -12 /* Marker to indicate arrival of 
compressed data */
-#define XC_SAVE_ID_ENABLE_COMPRESSION -13 /* Marker to enable compression 
logic at receiver side */
-#define XC_SAVE_ID_HVM_GENERATION_ID_ADDR -14
-/* Markers for the pfn's hosting these mem event rings */
-#define XC_SAVE_ID_HVM_PAGING_RING_PFN  -15
-#define XC_SAVE_ID_HVM_MONITOR_RING_PFN -16
-#define XC_SAVE_ID_HVM_SHARING_RING_PFN -17
-#define XC_SAVE_ID_TOOLSTACK          -18 /* Optional toolstack specific info 
*/
-/* These are a pair; it is an error for one to exist without the other */
-#define XC_SAVE_ID_HVM_IOREQ_SERVER_PFN -19
-#define XC_SAVE_ID_HVM_NR_IOREQ_SERVER_PAGES -20
-
-/*
 ** We process save/restore/migrate in batches of pages; the below
 ** determines how many pages we (at maximum) deal with in each batch.
 */
@@ -272,11 +30,6 @@
 /* When pinning page tables at the end of restore, we also use batching. */
 #define MAX_PIN_BATCH  1024
 
-/* Maximum #VCPUs currently supported for save/restore. */
-#define XC_SR_MAX_VCPUS 4096
-#define vcpumap_sz(max_id) (((max_id)/64+1)*sizeof(uint64_t))
-
-
 /*
 ** Determine various platform information required for save/restore, in
 ** particular:
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.