[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Enable lazy (on-demand) allocation of memory to a guest being restored; this
# HG changeset patch # User Steven Hand <steven@xxxxxxxxxxxxx> # Date 1168941770 0 # Node ID 895d873a00b47cb7b0edf3d0b6a42f47a3f4854c # Parent 887168cf753254e70f38974367091f687a480bd5 Enable lazy (on-demand) allocation of memory to a guest being restored; this means that ballooned down domains only require as much memory as is currently being used (rather than their max) when being restored from save, or when being migrated. Signed-off-by: Steven Hand <steven@xxxxxxxxxxxxx> --- tools/libxc/xc_linux_restore.c | 207 +++++++++++++++++++++----------- tools/python/xen/xend/XendCheckpoint.py | 8 - 2 files changed, 145 insertions(+), 70 deletions(-) diff -r 887168cf7532 -r 895d873a00b4 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Mon Jan 15 18:09:16 2007 +0000 +++ b/tools/libxc/xc_linux_restore.c Tue Jan 16 10:02:50 2007 +0000 @@ -12,7 +12,7 @@ #include "xg_private.h" #include "xg_save_restore.h" -/* max mfn of the whole machine */ +/* max mfn of the current host machine */ static unsigned long max_mfn; /* virtual starting address of the hypervisor */ @@ -29,6 +29,9 @@ static xen_pfn_t *live_p2m = NULL; /* A table mapping each PFN to its new MFN. */ static xen_pfn_t *p2m = NULL; + +/* A table of P2M mappings in the current region */ +static xen_pfn_t *p2m_batch = NULL; static ssize_t @@ -57,46 +60,78 @@ read_exact(int fd, void *buf, size_t cou ** This function inverts that operation, replacing the pfn values with ** the (now known) appropriate mfn values. */ -static int uncanonicalize_pagetable(unsigned long type, void *page) +static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, + unsigned long type, void *page) { int i, pte_last; unsigned long pfn; uint64_t pte; + int nr_mfns = 0; pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); - /* Now iterate through the page table, uncanonicalizing each PTE */ + /* First pass: work out how many (if any) MFNs we need to alloc */ for(i = 0; i < pte_last; i++) { - + if(pt_levels == 2) pte = ((uint32_t *)page)[i]; else pte = ((uint64_t *)page)[i]; - - if(pte & _PAGE_PRESENT) { - - pfn = (pte >> PAGE_SHIFT) & 0xffffffff; - - if(pfn >= max_pfn) { - /* This "page table page" is probably not one; bail. */ - ERROR("Frame number in type %lu page table is out of range: " - "i=%d pfn=0x%lx max_pfn=%lu", - type >> 28, i, pfn, max_pfn); - return 0; - } - - - pte &= 0xffffff0000000fffULL; - pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; - - if(pt_levels == 2) - ((uint32_t *)page)[i] = (uint32_t)pte; - else - ((uint64_t *)page)[i] = (uint64_t)pte; - - - - } + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & 0xffffffff; + + if(pfn >= max_pfn) { + /* This "page table page" is probably not one; bail. */ + ERROR("Frame number in type %lu page table is out of range: " + "i=%d pfn=0x%lx max_pfn=%lu", + type >> 28, i, pfn, max_pfn); + return 0; + } + + if(p2m[pfn] == INVALID_P2M_ENTRY) { + /* Have a 'valid' PFN without a matching MFN - need to alloc */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Alllocate the requistite number of mfns */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + return 0; + } + + /* Second pass: uncanonicalize each present PTE */ + nr_mfns = 0; + for(i = 0; i < pte_last; i++) { + + if(pt_levels == 2) + pte = ((uint32_t *)page)[i]; + else + pte = ((uint64_t *)page)[i]; + + /* XXX SMH: below needs fixing for PROT_NONE etc */ + if(!(pte & _PAGE_PRESENT)) + continue; + + pfn = (pte >> PAGE_SHIFT) & 0xffffffff; + + if(p2m[pfn] == INVALID_P2M_ENTRY) + p2m[pfn] = p2m_batch[nr_mfns++]; + + pte &= 0xffffff0000000fffULL; + pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT; + + if(pt_levels == 2) + ((uint32_t *)page)[i] = (uint32_t)pte; + else + ((uint64_t *)page)[i] = (uint64_t)pte; } return 1; @@ -140,6 +175,7 @@ int xc_linux_restore(int xc_handle, int /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; + /* Our mapping of the current region (batch) */ char *region_base; xc_mmu_t *mmu = NULL; @@ -244,8 +280,10 @@ int xc_linux_restore(int xc_handle, int p2m = calloc(max_pfn, sizeof(xen_pfn_t)); pfn_type = calloc(max_pfn, sizeof(unsigned long)); region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); - - if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { + p2m_batch = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); + + if ((p2m == NULL) || (pfn_type == NULL) || + (region_mfn == NULL) || (p2m_batch == NULL)) { ERROR("memory alloc failed"); errno = ENOMEM; goto out; @@ -253,6 +291,11 @@ int xc_linux_restore(int xc_handle, int if (lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { ERROR("Could not lock region_mfn"); + goto out; + } + + if (lock_pages(p2m_batch, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { + ERROR("Could not lock p2m_batch"); goto out; } @@ -270,17 +313,9 @@ int xc_linux_restore(int xc_handle, int goto out; } + /* Mark all PFNs as invalid; we allocate on demand */ for ( pfn = 0; pfn < max_pfn; pfn++ ) - p2m[pfn] = pfn; - - if (xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn, - 0, 0, p2m) != 0) { - ERROR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn)); - errno = ENOMEM; - goto out; - } - - DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn)); + p2m[pfn] = INVALID_P2M_ENTRY; if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) { ERROR("Could not initialise for MMU updates"); @@ -298,7 +333,7 @@ int xc_linux_restore(int xc_handle, int n = 0; while (1) { - int j; + int j, nr_mfns = 0; this_pc = (n * 100) / max_pfn; if ( (this_pc - prev_pc) >= 5 ) @@ -333,20 +368,57 @@ int xc_linux_restore(int xc_handle, int goto out; } + /* First pass for this batch: work out how much memory to alloc */ + nr_mfns = 0; for ( i = 0; i < j; i++ ) { unsigned long pfn, pagetype; pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + if ( (pagetype != XEN_DOMCTL_PFINFO_XTAB) && + (p2m[pfn] == INVALID_P2M_ENTRY) ) + { + /* Have a live PFN which hasn't had an MFN allocated */ + p2m_batch[nr_mfns++] = pfn; + } + } + + + /* Now allocate a bunch of mfns for this batch */ + if (nr_mfns && xc_domain_memory_populate_physmap( + xc_handle, dom, nr_mfns, 0, 0, p2m_batch) != 0) { + ERROR("Failed to allocate memory for batch.!\n"); + errno = ENOMEM; + goto out; + } + + /* Second pass for this batch: update p2m[] and region_mfn[] */ + nr_mfns = 0; + for ( i = 0; i < j; i++ ) + { + unsigned long pfn, pagetype; + pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; + pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; + if ( pagetype == XEN_DOMCTL_PFINFO_XTAB) - region_mfn[i] = 0; /* we know map will fail, but don't care */ - else - region_mfn[i] = p2m[pfn]; - } - + region_mfn[i] = ~0UL; /* map will fail but we don't care */ + else + { + if (p2m[pfn] == INVALID_P2M_ENTRY) { + /* We just allocated a new mfn above; update p2m */ + p2m[pfn] = p2m_batch[nr_mfns++]; + } + + /* setup region_mfn[] for batch map */ + region_mfn[i] = p2m[pfn]; + } + } + + /* Map relevant mfns */ region_base = xc_map_foreign_batch( xc_handle, dom, PROT_WRITE, region_mfn, j); + if ( region_base == NULL ) { ERROR("map batch failed"); @@ -401,7 +473,8 @@ int xc_linux_restore(int xc_handle, int pae_extended_cr3 || (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { - if (!uncanonicalize_pagetable(pagetype, page)) { + if (!uncanonicalize_pagetable(xc_handle, dom, + pagetype, page)) { /* ** Failing to uncanonicalize a page table can be ok ** under live migration since the pages type may have @@ -411,10 +484,8 @@ int xc_linux_restore(int xc_handle, int pagetype >> 28, pfn, mfn); nraces++; continue; - } - - } - + } + } } else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) { @@ -486,7 +557,7 @@ int xc_linux_restore(int xc_handle, int */ int j, k; - + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ for ( i = 0; i < max_pfn; i++ ) { @@ -555,7 +626,8 @@ int xc_linux_restore(int xc_handle, int } for(k = 0; k < j; k++) { - if(!uncanonicalize_pagetable(XEN_DOMCTL_PFINFO_L1TAB, + if(!uncanonicalize_pagetable(xc_handle, dom, + XEN_DOMCTL_PFINFO_L1TAB, region_base + k*PAGE_SIZE)) { ERROR("failed uncanonicalize pt!"); goto out; @@ -631,7 +703,7 @@ int xc_linux_restore(int xc_handle, int { unsigned int count; unsigned long *pfntab; - int rc; + int nr_frees, rc; if (!read_exact(io_fd, &count, sizeof(count))) { ERROR("Error when reading pfn count"); @@ -648,29 +720,30 @@ int xc_linux_restore(int xc_handle, int goto out; } + nr_frees = 0; for (i = 0; i < count; i++) { unsigned long pfn = pfntab[i]; - if(pfn > max_pfn) - /* shouldn't happen - continue optimistically */ - continue; - - pfntab[i] = p2m[pfn]; - p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map - } - - if (count > 0) { + if(p2m[pfn] != INVALID_P2M_ENTRY) { + /* pfn is not in physmap now, but was at some point during + the save/migration process - need to free it */ + pfntab[nr_frees++] = p2m[pfn]; + p2m[pfn] = INVALID_P2M_ENTRY; // not in pseudo-physical map + } + } + + if (nr_frees > 0) { struct xen_memory_reservation reservation = { - .nr_extents = count, + .nr_extents = nr_frees, .extent_order = 0, .domid = dom }; set_xen_guest_handle(reservation.extent_start, pfntab); if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation)) != count) { + &reservation)) != nr_frees) { ERROR("Could not decrease reservation : %d", rc); goto out; } else @@ -791,6 +864,6 @@ int xc_linux_restore(int xc_handle, int free(pfn_type); DPRINTF("Restore exit with rc=%d\n", rc); - + return rc; } diff -r 887168cf7532 -r 895d873a00b4 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Mon Jan 15 18:09:16 2007 +0000 +++ b/tools/python/xen/xend/XendCheckpoint.py Tue Jan 16 10:02:50 2007 +0000 @@ -147,18 +147,20 @@ def restore(xd, fd, dominfo = None, paus assert store_port assert console_port + nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 + try: l = read_exact(fd, sizeof_unsigned_long, "not a valid guest state file: pfn count read") - nr_pfns = unpack("L", l)[0] # native sizeof long - if nr_pfns > 16*1024*1024: # XXX + max_pfn = unpack("L", l)[0] # native sizeof long + if max_pfn > 16*1024*1024: # XXX raise XendError( "not a valid guest state file: pfn count out of range") balloon.free(xc.pages_to_kib(nr_pfns)) cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), - fd, dominfo.getDomid(), nr_pfns, + fd, dominfo.getDomid(), max_pfn, store_port, console_port]) log.debug("[xc_restore]: %s", string.join(cmd)) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |