[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2] libxc: try to find last used pfn when migrating



For migration the last used pfn of a guest is needed to size the
logdirty bitmap and as an upper bound of the page loop. Unfortunately
there are pv-kernels advertising a much higher maximum pfn as they
are really using in order to support memory hotplug. This will lead
to allocation of much more memory in Xen tools during migration as
really needed.

Try to find the last used guest pfn of a pv-domu by scanning the p2m
tree from the last entry towards it's start and search for an entry
not being invalid.

Normally the mid pages of the p2m tree containing all invalid entries
are being reused, so we can just scan the top page for identical
entries and skip them but the first one.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
Changes in V2:
- Modified comments regarding setup callback in structures
  xc_sr_save_ops and xc_sr_restore_ops (suggested by Andrew).
- Got rid of calls of xc_domain_maximum_gpfn() especially in the pv
  case (suggested by Wei).

---
 tools/libxc/xc_sr_common.h        | 14 ++++++++-----
 tools/libxc/xc_sr_common_x86_pv.c | 19 +-----------------
 tools/libxc/xc_sr_save.c          | 24 ++++-------------------
 tools/libxc/xc_sr_save_x86_hvm.c  | 14 +++++++++++++
 tools/libxc/xc_sr_save_x86_pv.c   | 41 ++++++++++++++++++++++++++++++++++++---
 5 files changed, 66 insertions(+), 46 deletions(-)

diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h
index 64f6082..9aecde2 100644
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -54,9 +54,11 @@ struct xc_sr_save_ops
                           void **page);
 
     /**
-     * Set up local environment to restore a domain.  This is called before
-     * any records are written to the stream.  (Typically querying running
-     * domain state, setting up mappings etc.)
+     * Set up local environment to save a domain. (Typically querying
+     * running domain state, setting up mappings etc.)
+     *
+     * This is called once before any common setup has occurred, allowing for
+     * guest-specific adjustments to be made to common state.
      */
     int (*setup)(struct xc_sr_context *ctx);
 
@@ -121,8 +123,10 @@ struct xc_sr_restore_ops
     int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
 
     /**
-     * Set up local environment to restore a domain.  This is called before
-     * any records are read from the stream.
+     * Set up local environment to restore a domain.
+     *
+     * This is called once before any common setup has occurred, allowing for
+     * guest-specific adjustments to be made to common state.
      */
     int (*setup)(struct xc_sr_context *ctx);
 
diff --git a/tools/libxc/xc_sr_common_x86_pv.c 
b/tools/libxc/xc_sr_common_x86_pv.c
index eb68c07..f233c87 100644
--- a/tools/libxc/xc_sr_common_x86_pv.c
+++ b/tools/libxc/xc_sr_common_x86_pv.c
@@ -68,8 +68,7 @@ uint64_t mfn_to_cr3(struct xc_sr_context *ctx, xen_pfn_t _mfn)
 int x86_pv_domain_info(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
-    unsigned int guest_width, guest_levels, fpp;
-    xen_pfn_t max_pfn;
+    unsigned int guest_width, guest_levels;
 
     /* Get the domain width */
     if ( xc_domain_get_guest_width(xch, ctx->domid, &guest_width) )
@@ -89,25 +88,9 @@ int x86_pv_domain_info(struct xc_sr_context *ctx)
     }
     ctx->x86_pv.width = guest_width;
     ctx->x86_pv.levels = guest_levels;
-    fpp = PAGE_SIZE / ctx->x86_pv.width;
 
     DPRINTF("%d bits, %d levels", guest_width * 8, guest_levels);
 
-    /* Get the domain's size */
-    if ( xc_domain_maximum_gpfn(xch, ctx->domid, &max_pfn) < 0 )
-    {
-        PERROR("Unable to obtain guests max pfn");
-        return -1;
-    }
-
-    if ( max_pfn > 0 )
-    {
-        ctx->x86_pv.max_pfn = max_pfn;
-        ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp;
-
-        DPRINTF("max_pfn %#lx, p2m_frames %d", max_pfn, 
ctx->x86_pv.p2m_frames);
-    }
-
     return 0;
 }
 
diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c
index 0c12e56..cefcef5 100644
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -677,6 +677,10 @@ static int setup(struct xc_sr_context *ctx)
     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
                                     &ctx->save.dirty_bitmap_hbuf);
 
+    rc = ctx->save.ops.setup(ctx);
+    if ( rc )
+        goto err;
+
     dirty_bitmap = xc_hypercall_buffer_alloc_pages(
                    xch, dirty_bitmap, 
NRPAGES(bitmap_size(ctx->save.p2m_size)));
     ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
@@ -692,10 +696,6 @@ static int setup(struct xc_sr_context *ctx)
         goto err;
     }
 
-    rc = ctx->save.ops.setup(ctx);
-    if ( rc )
-        goto err;
-
     rc = 0;
 
  err:
@@ -824,7 +824,6 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom,
                    uint32_t max_iters, uint32_t max_factor, uint32_t flags,
                    struct save_callbacks* callbacks, int hvm)
 {
-    xen_pfn_t nr_pfns;
     struct xc_sr_context ctx =
         {
             .xch = xch,
@@ -869,21 +868,6 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t 
dom,
 
     ctx.domid = dom;
 
-    if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
-    {
-        PERROR("Unable to obtain the guest p2m size");
-        return -1;
-    }
-
-    ctx.save.p2m_size = nr_pfns;
-
-    if ( ctx.save.p2m_size > ~XEN_DOMCTL_PFINFO_LTAB_MASK )
-    {
-        errno = E2BIG;
-        ERROR("Cannot save this big a guest");
-        return -1;
-    }
-
     if ( ctx.dominfo.hvm )
     {
         ctx.save.ops = save_ops_x86_hvm;
diff --git a/tools/libxc/xc_sr_save_x86_hvm.c b/tools/libxc/xc_sr_save_x86_hvm.c
index cdee774..3c879ed 100644
--- a/tools/libxc/xc_sr_save_x86_hvm.c
+++ b/tools/libxc/xc_sr_save_x86_hvm.c
@@ -135,6 +135,20 @@ static int x86_hvm_normalise_page(struct xc_sr_context 
*ctx,
 static int x86_hvm_setup(struct xc_sr_context *ctx)
 {
     xc_interface *xch = ctx->xch;
+    xen_pfn_t nr_pfns;
+
+    if ( xc_domain_nr_gpfns(xch, ctx->domid, &nr_pfns) < 0 )
+    {
+        PERROR("Unable to obtain the guest p2m size");
+        return -1;
+    }
+    if ( nr_pfns > ~XEN_DOMCTL_PFINFO_LTAB_MASK )
+    {
+        PERROR("Cannot save this big a guest");
+        return -1;
+    }
+
+    ctx->save.p2m_size = nr_pfns;
 
     if ( ctx->save.callbacks->switch_qemu_logdirty(
              ctx->domid, 1, ctx->save.callbacks->data) )
diff --git a/tools/libxc/xc_sr_save_x86_pv.c b/tools/libxc/xc_sr_save_x86_pv.c
index f63f40b..c8d6f0b 100644
--- a/tools/libxc/xc_sr_save_x86_pv.c
+++ b/tools/libxc/xc_sr_save_x86_pv.c
@@ -83,8 +83,8 @@ static int map_p2m(struct xc_sr_context *ctx)
      */
     xc_interface *xch = ctx->xch;
     int rc = -1;
-    unsigned x, fpp, fll_entries, fl_entries;
-    xen_pfn_t fll_mfn;
+    unsigned x, saved_x, fpp, fll_entries, fl_entries;
+    xen_pfn_t fll_mfn, saved_mfn, max_pfn;
 
     xen_pfn_t *local_fll = NULL;
     void *guest_fll = NULL;
@@ -94,9 +94,15 @@ static int map_p2m(struct xc_sr_context *ctx)
     void *guest_fl = NULL;
     size_t local_fl_size;
 
+    ctx->x86_pv.max_pfn = GET_FIELD(ctx->x86_pv.shinfo, arch.max_pfn,
+                                    ctx->x86_pv.width) - 1;
     fpp = PAGE_SIZE / ctx->x86_pv.width;
     fll_entries = (ctx->x86_pv.max_pfn / (fpp * fpp)) + 1;
-    fl_entries  = (ctx->x86_pv.max_pfn / fpp) + 1;
+    if ( fll_entries > fpp )
+    {
+        ERROR("max_pfn %#lx too large for p2m tree", ctx->x86_pv.max_pfn);
+        goto err;
+    }
 
     fll_mfn = GET_FIELD(ctx->x86_pv.shinfo, arch.pfn_to_mfn_frame_list_list,
                         ctx->x86_pv.width);
@@ -131,6 +137,8 @@ static int map_p2m(struct xc_sr_context *ctx)
     }
 
     /* Check for bad mfns in frame list list. */
+    saved_mfn = 0;
+    saved_x = 0;
     for ( x = 0; x < fll_entries; ++x )
     {
         if ( local_fll[x] == 0 || local_fll[x] > ctx->x86_pv.max_mfn )
@@ -139,8 +147,35 @@ static int map_p2m(struct xc_sr_context *ctx)
                   local_fll[x], x, fll_entries);
             goto err;
         }
+        if ( local_fll[x] != saved_mfn )
+        {
+            saved_mfn = local_fll[x];
+            saved_x = x;
+        }
     }
 
+    /*
+     * Check for actual lower max_pfn:
+     * If the trailing entries of the frame list list were all the same we can
+     * assume they all reference mid pages all referencing p2m pages with all
+     * invalid entries. Otherwise there would be multiple pfns referencing all
+     * the same mfn which can't work across migration, as this sharing would be
+     * broken by the migration process.
+     * Adjust max_pfn if possible to avoid allocating much larger areas as
+     * needed for p2m and logdirty map.
+     */
+    max_pfn = (saved_x + 1) * fpp * fpp - 1;
+    if ( max_pfn < ctx->x86_pv.max_pfn )
+    {
+        ctx->x86_pv.max_pfn = max_pfn;
+        fll_entries = (ctx->x86_pv.max_pfn / (fpp * fpp)) + 1;
+    }
+    ctx->x86_pv.p2m_frames = (ctx->x86_pv.max_pfn + fpp) / fpp;
+    DPRINTF("max_pfn %#lx, p2m_frames %d", ctx->x86_pv.max_pfn,
+            ctx->x86_pv.p2m_frames);
+    ctx->save.p2m_size = ctx->x86_pv.max_pfn + 1;
+    fl_entries  = (ctx->x86_pv.max_pfn / fpp) + 1;
+
     /* Map the guest mid p2m frames. */
     guest_fl = xc_map_foreign_pages(xch, ctx->domid, PROT_READ,
                                     local_fll, fll_entries);
-- 
2.6.2


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.