[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] superpages lost after migration of HVM domU
On Wed, Apr 26, Andrew Cooper wrote: > On 26/04/17 16:43, Olaf Hering wrote: > > On Thu, Apr 20, Jan Beulich wrote: > > > >>>>> On 20.04.17 at 18:04, <olaf@xxxxxxxxx> wrote: > >>> On Thu, Apr 20, Andrew Cooper wrote: > >>> > >>>> As it currently stands, the sending side iterates from 0 to p2m_size, > >>>> and sends every frame on the first pass. This means we get PAGE_DATA > >>>> records linearly, in batches of 1024, or two aligned 2M superpages. > >>> Is there a way to preserve 1G pages? This 380G domU I'm looking at is > >>> built with 4k:461390 2M:2341 1G:365 pages. > >> I think we've hashed out a possible way to deal with this, by > >> speculatively allocating 1G pages as long as the allocation cap for > >> the domain allows, subsequently punching holes into those pages > >> if we can't allocate any new pages anymore (due to otherwise > >> overrunning the cap). > > The result is not pretty. This HVM-only approach appears to work for a > > domU with "memory=3024" and localhost migration. > > It is required to punch holes as soon as possible to avoid errors in > > xenforeignmemory_map due to "Over-allocation". Would be nice if the > > receiver gets a memory map upfront to avoid all stunts... > > Oh - I was about to start working on this. This is a pleasant surprise. :) Here is a variant that actually works for migration between two dom0s. --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -107,6 +107,9 @@ struct xc_sr_save_ops */ struct xc_sr_restore_ops { + /* Allocate a MFN for the given PFN */ + int (*allocate_pfn)(struct xc_sr_context *ctx, xen_pfn_t pfn); + /* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */ xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn); @@ -172,6 +175,52 @@ struct xc_sr_x86_pv_restore_vcpu size_t basicsz, extdsz, xsavesz, msrsz; }; +struct xc_sr_bitmap +{ + void *p; + unsigned long bits; +}; + +extern bool _xc_sr_bitmap_resize(struct xc_sr_bitmap *bm, unsigned long bits); +static inline bool xc_sr_bitmap_resize(struct xc_sr_bitmap *bm, unsigned long bits) +{ + if (bits > bm->bits) + return _xc_sr_bitmap_resize(bm, bits); + return true; +} + +static inline void xc_sr_bitmap_free(struct xc_sr_bitmap *bm) +{ + free(bm->p); +} + +static inline bool xc_sr_set_bit(unsigned long bit, struct xc_sr_bitmap *bm) +{ + if (!xc_sr_bitmap_resize(bm, bit)) + return false; + + set_bit(bit, bm->p); + return true; +} + +static inline bool xc_sr_test_bit(unsigned long bit, struct xc_sr_bitmap *bm) +{ + if (bit > bm->bits) + return false; + return !!test_bit(bit, bm->p); +} + +static inline int xc_sr_test_and_clear_bit(unsigned long bit, struct xc_sr_bitmap *bm) +{ + return test_and_clear_bit(bit, bm->p); +} + +static inline int xc_sr_test_and_set_bit(unsigned long bit, struct xc_sr_bitmap *bm) +{ + return test_and_set_bit(bit, bm->p); +} + + struct xc_sr_context { xc_interface *xch; @@ -256,8 +305,7 @@ struct xc_sr_context domid_t xenstore_domid, console_domid; /* Bitmap of currently populated PFNs during restore. */ - unsigned long *populated_pfns; - xen_pfn_t max_populated_pfn; + struct xc_sr_bitmap populated_pfns; /* Sender has invoked verify mode on the stream. */ bool verify; @@ -332,6 +380,12 @@ struct xc_sr_context /* HVM context blob. */ void *context; size_t contextsz; + + /* Bitmap of currently allocated PFNs during restore. */ + struct xc_sr_bitmap attempted_1g; + struct xc_sr_bitmap attempted_2m; + struct xc_sr_bitmap allocated_pfns; + unsigned long alloc_cnt; } restore; }; } x86_hvm; --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -71,11 +71,9 @@ static int read_headers(struct xc_sr_con /* * Is a pfn populated? */ -static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn) +static bool pfn_is_populated(struct xc_sr_context *ctx, xen_pfn_t pfn) { - if ( pfn > ctx->restore.max_populated_pfn ) - return false; - return test_bit(pfn, ctx->restore.populated_pfns); + return xc_sr_test_bit(pfn, &ctx->restore.populated_pfns); } /* @@ -87,42 +85,12 @@ static int pfn_set_populated(struct xc_s { xc_interface *xch = ctx->xch; - if ( pfn > ctx->restore.max_populated_pfn ) + if ( !xc_sr_set_bit(pfn, &ctx->restore.populated_pfns) ) { - xen_pfn_t new_max; - size_t old_sz, new_sz; - unsigned long *p; - - /* Round up to the nearest power of two larger than pfn, less 1. */ - new_max = pfn; - new_max |= new_max >> 1; - new_max |= new_max >> 2; - new_max |= new_max >> 4; - new_max |= new_max >> 8; - new_max |= new_max >> 16; -#ifdef __x86_64__ - new_max |= new_max >> 32; -#endif - - old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1); - new_sz = bitmap_size(new_max + 1); - p = realloc(ctx->restore.populated_pfns, new_sz); - if ( !p ) - { - ERROR("Failed to realloc populated bitmap"); - errno = ENOMEM; - return -1; - } - - memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz); - - ctx->restore.populated_pfns = p; - ctx->restore.max_populated_pfn = new_max; + ERROR("Failed to realloc populated bitmap"); + errno = ENOMEM; + return -1; } - - assert(!test_bit(pfn, ctx->restore.populated_pfns)); - set_bit(pfn, ctx->restore.populated_pfns); - return 0; } @@ -135,6 +103,7 @@ int populate_pfns(struct xc_sr_context * const xen_pfn_t *original_pfns, const uint32_t *types) { xc_interface *xch = ctx->xch; + xen_pfn_t min_pfn = original_pfns[0], max_pfn = original_pfns[0]; xen_pfn_t *mfns = malloc(count * sizeof(*mfns)), *pfns = malloc(count * sizeof(*pfns)); unsigned i, nr_pfns = 0; @@ -149,11 +118,18 @@ int populate_pfns(struct xc_sr_context * for ( i = 0; i < count; ++i ) { + if (original_pfns[i] < min_pfn) + min_pfn = original_pfns[i]; + if (original_pfns[i] > max_pfn) + max_pfn = original_pfns[i]; if ( (!types || (types && (types[i] != XEN_DOMCTL_PFINFO_XTAB && types[i] != XEN_DOMCTL_PFINFO_BROKEN))) && !pfn_is_populated(ctx, original_pfns[i]) ) { + rc = ctx->restore.ops.allocate_pfn(ctx, original_pfns[i]); + if ( rc ) + goto err; rc = pfn_set_populated(ctx, original_pfns[i]); if ( rc ) goto err; @@ -161,6 +137,21 @@ int populate_pfns(struct xc_sr_context * ++nr_pfns; } } + IPRINTF("checking range %lx %lx\n", min_pfn, max_pfn); + while (min_pfn < max_pfn) { + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.allocated_pfns, min_pfn)) + { + PERROR("Failed to realloc allocated_pfns %" PRI_xen_pfn, min_pfn); + goto err; + } + if (!pfn_is_populated(ctx, min_pfn) && xc_sr_test_and_clear_bit(min_pfn, &ctx->x86_hvm.restore.allocated_pfns)) { + xen_pfn_t pfn = min_pfn; + rc = xc_domain_decrease_reservation_exact(xch, ctx->domid, 1, 0, &pfn); + IPRINTF("free %lx %lx %d\n", min_pfn, pfn, rc); + } + min_pfn++; + } + nr_pfns = 0; if ( nr_pfns ) { @@ -684,10 +675,8 @@ static int setup(struct xc_sr_context *c if ( rc ) goto err; - ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1; - ctx->restore.populated_pfns = bitmap_alloc( - ctx->restore.max_populated_pfn + 1); - if ( !ctx->restore.populated_pfns ) + rc = !xc_sr_bitmap_resize(&ctx->restore.populated_pfns, 32 * 1024 / 4); + if ( rc ) { ERROR("Unable to allocate memory for populated_pfns bitmap"); rc = -1; @@ -722,7 +711,10 @@ static void cleanup(struct xc_sr_context xc_hypercall_buffer_free_pages(xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size))); free(ctx->restore.buffered_records); - free(ctx->restore.populated_pfns); + xc_sr_bitmap_free(&ctx->restore.populated_pfns); + xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_1g); + xc_sr_bitmap_free(&ctx->x86_hvm.restore.attempted_2m); + xc_sr_bitmap_free(&ctx->x86_hvm.restore.allocated_pfns); if ( ctx->restore.ops.cleanup(ctx) ) PERROR("Failed to clean up"); } @@ -810,6 +802,17 @@ static int restore(struct xc_sr_context saved_errno = errno; saved_rc = rc; PERROR("Restore failed"); + { + unsigned long i; + bool a, p; + IPRINTF("alloc_cnt %lu\n", ctx->x86_hvm.restore.alloc_cnt); + for (i = 0; i < ctx->restore.p2m_size; i++) { + p = xc_sr_test_bit(i, &ctx->restore.populated_pfns); + a = xc_sr_test_bit(i, &ctx->x86_hvm.restore.allocated_pfns); + if (p != a) + IPRINTF("%lx a %x p %x\n", i, a, p); + } + } done: cleanup(ctx); @@ -888,6 +891,7 @@ int xc_domain_restore(xc_interface *xch, } ctx.restore.p2m_size = nr_pfns; + IPRINTF("p2m_size %lx\n", ctx.restore.p2m_size); if ( ctx.dominfo.hvm ) { --- a/tools/libxc/xc_sr_restore_x86_hvm.c +++ b/tools/libxc/xc_sr_restore_x86_hvm.c @@ -3,6 +3,10 @@ #include "xc_sr_common_x86.h" +#define SUPERPAGE_2MB_SHIFT 9 +#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT) +#define SUPERPAGE_1GB_SHIFT 18 +#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT) /* * Process an HVM_CONTEXT record from the stream. */ @@ -130,6 +134,17 @@ static int x86_hvm_setup(struct xc_sr_co return -1; } + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, (ctx->restore.p2m_size >> SUPERPAGE_1GB_SHIFT) + 1) || + !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, (ctx->restore.p2m_size >> SUPERPAGE_2MB_SHIFT) + 1) || + !xc_sr_bitmap_resize(&ctx->x86_hvm.restore.allocated_pfns, ctx->restore.p2m_size + 1)) + { + ERROR("Unable to allocate memory for allocated_pfns bitmaps"); + return -1; + } + /* No superpage in 1st 2MB due to VGA hole */ + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_1g); + xc_sr_set_bit(0, &ctx->x86_hvm.restore.attempted_2m); + return 0; } @@ -209,8 +224,110 @@ static int x86_hvm_cleanup(struct xc_sr_ return 0; } +static bool pfn_is_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + return xc_sr_test_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns); +} + +/* + * Set a pfn as allocated, expanding the tracking structures if needed. To + * avoid realloc()ing too excessively, the size increased to the nearest power + * of two large enough to contain the required pfn. + */ +static int pfn_set_allocated(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + xc_interface *xch = ctx->xch; + + if ( !xc_sr_set_bit(pfn, &ctx->x86_hvm.restore.allocated_pfns) ) + { + ERROR("Failed to realloc allocated_pfns bitmap"); + errno = ENOMEM; + return -1; + } + return 0; +} + +static int x86_hvm_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + xc_interface *xch = ctx->xch; + bool success = false; + int rc = -1; + long done; + unsigned long i; + unsigned long stat_1g = 0, stat_2m = 0, stat_4k = 0; + unsigned long idx_1g, idx_2m; + unsigned long count; + xen_pfn_t base_pfn = 0, sp_extent; + + IPRINTF("pfn %lx\n", (long)pfn); + if (pfn_is_allocated(ctx, pfn)) + return 0; + + idx_1g = pfn >> SUPERPAGE_1GB_SHIFT; + idx_2m = pfn >> SUPERPAGE_2MB_SHIFT; + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_1g, idx_1g)) + { + PERROR("Failed to realloc attempted_1g"); + return -1; + } + if (!xc_sr_bitmap_resize(&ctx->x86_hvm.restore.attempted_2m, idx_2m)) + { + PERROR("Failed to realloc attempted_2m"); + return -1; + } + IPRINTF("idx_1g %lu idx_2m %lu\n", idx_1g, idx_2m); + if (!xc_sr_test_and_set_bit(idx_1g, &ctx->x86_hvm.restore.attempted_1g)) { + count = 1UL << SUPERPAGE_1GB_SHIFT; + base_pfn = (pfn >> SUPERPAGE_1GB_SHIFT) << SUPERPAGE_1GB_SHIFT; + sp_extent = base_pfn; + done = xc_domain_populate_physmap(xch, ctx->domid, 1, SUPERPAGE_1GB_SHIFT, 0, &sp_extent); + IPRINTF("1G base_pfn %lx count %lu done %ld\n", (long)base_pfn, count, done); + if (done > 0) { + success = true; + ctx->x86_hvm.restore.alloc_cnt += count; + stat_1g = done; + for (i = 0; i < (count >> SUPERPAGE_2MB_SHIFT); i++) + xc_sr_set_bit((base_pfn >> SUPERPAGE_2MB_SHIFT) + i, &ctx->x86_hvm.restore.attempted_2m); + } + } + + if (!xc_sr_test_and_set_bit(idx_2m, &ctx->x86_hvm.restore.attempted_2m)) { + count = 1UL << SUPERPAGE_2MB_SHIFT; + base_pfn = (pfn >> SUPERPAGE_2MB_SHIFT) << SUPERPAGE_2MB_SHIFT; + sp_extent = base_pfn; + done = xc_domain_populate_physmap(xch, ctx->domid, 1, SUPERPAGE_2MB_SHIFT, 0, &sp_extent); + IPRINTF("2M base_pfn %lx count %lu done %ld\n", (long)base_pfn, count, done); + if (done > 0) { + success = true; + ctx->x86_hvm.restore.alloc_cnt += count; + stat_2m = done; + } + } + if (success == false) { + count = 1; + sp_extent = base_pfn = pfn; + done = xc_domain_populate_physmap(xch, ctx->domid, count, 0, 0, &sp_extent); + if (done > 0) { + success = true; + ctx->x86_hvm.restore.alloc_cnt += count; + stat_4k = count; + } + } + IPRINTF("count %lu 1G %lu 2M %lu 4k %lu\n", count, stat_1g, stat_2m, stat_4k); + if (success == true) { + do { + count--; + rc = pfn_set_allocated(ctx, base_pfn + count); + if (rc) + break; + } while (count); + } + return rc; +} + struct xc_sr_restore_ops restore_ops_x86_hvm = { + .allocate_pfn = x86_hvm_allocate_pfn, .pfn_is_valid = x86_hvm_pfn_is_valid, .pfn_to_gfn = x86_hvm_pfn_to_gfn, .set_gfn = x86_hvm_set_gfn, --- a/tools/libxc/xc_sr_restore_x86_pv.c +++ b/tools/libxc/xc_sr_restore_x86_pv.c @@ -1141,8 +1141,15 @@ static int x86_pv_cleanup(struct xc_sr_c return 0; } +static int x86_pv_allocate_pfn(struct xc_sr_context *ctx, xen_pfn_t pfn) +{ + errno = ENOMEM; + return -1; +} + struct xc_sr_restore_ops restore_ops_x86_pv = { + .allocate_pfn = x86_pv_allocate_pfn, .pfn_is_valid = x86_pv_pfn_is_valid, .pfn_to_gfn = pfn_to_mfn, .set_page_type = x86_pv_set_page_type, --- a/tools/libxc/xc_sr_common.c +++ b/tools/libxc/xc_sr_common.c @@ -153,6 +153,42 @@ static void __attribute__((unused)) buil XC_BUILD_BUG_ON(sizeof(struct xc_sr_rec_hvm_params) != 8); } +bool _xc_sr_bitmap_resize(struct xc_sr_bitmap *bm, unsigned long bits) +{ + if (bits > bm->bits) + { + size_t new_max; + size_t old_sz, new_sz; + void *p; + + /* Round up to the nearest power of two larger than bit, less 1. */ + new_max = bits; + new_max |= new_max >> 1; + new_max |= new_max >> 2; + new_max |= new_max >> 4; + new_max |= new_max >> 8; + new_max |= new_max >> 16; +#ifdef __x86_64__ + new_max |= new_max >> 32; +#endif + + old_sz = bitmap_size(bm->bits + 1); + new_sz = bitmap_size(new_max + 1); + p = realloc(bm->p, new_sz); + if (!p) + return false; + + if (bm->p) + memset(p + old_sz, 0, new_sz - old_sz); + else + memset(p, 0, new_sz); + + bm->p = p; + bm->bits = new_max; + } + return true; +} + /* * Local variables: * mode: C Attachment:
signature.asc _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |