|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RFC v1 04/13] libxc: allow arch_setup_meminit to populate HVM domain memory
Introduce a new arch_setup_meminit_hvm that's going to be used to populate
HVM domain memory. Rename arch_setup_meminit to arch_setup_meminit_hvm_pv
and introduce a stub arch_setup_meminit that will call the right meminit
function depending on the contains type.
Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Elena Ufimtseva <elena.ufimtseva@xxxxxxxxxx>
---
I think that both arch_setup_meminit_hvm and arch_setup_meminit_pv could be
unified into a single meminit function. I have however not looked into it,
and just created arch_setup_meminit_hvm based on the code in
xc_hvm_populate_memory.
---
tools/libxc/include/xc_dom.h | 8 +
tools/libxc/xc_dom_x86.c | 365 +++++++++++++++++++++++++++++++++++++++++--
tools/libxl/libxl_dom.c | 1 +
3 files changed, 362 insertions(+), 12 deletions(-)
diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index f7b5f0f..051a7de 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -186,6 +186,14 @@ struct xc_dom_image {
XC_DOM_PV_CONTAINER,
XC_DOM_HVM_CONTAINER,
} container_type;
+
+ /* HVM specific fields. */
+ xen_pfn_t target_pages;
+ xen_pfn_t mmio_start;
+ xen_pfn_t mmio_size;
+ xen_pfn_t lowmem_end;
+ xen_pfn_t highmem_end;
+ int vga_hole;
};
/* --- pluggable kernel loader ------------------------------------- */
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index b89f5c2..8a1ef24 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -40,10 +40,15 @@
/* ------------------------------------------------------------------------ */
-#define SUPERPAGE_PFN_SHIFT 9
-#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
#define SUPERPAGE_BATCH_SIZE 512
+#define SUPERPAGE_2MB_SHIFT 9
+#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT)
+#define SUPERPAGE_1GB_SHIFT 18
+#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT)
+
+#define VGA_HOLE_SIZE (0x20)
+
#define bits_to_mask(bits) (((xen_vaddr_t)1 << (bits))-1)
#define round_down(addr, mask) ((addr) & ~(mask))
#define round_up(addr, mask) ((addr) | (mask))
@@ -758,7 +763,7 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
return rc;
}
-int arch_setup_meminit(struct xc_dom_image *dom)
+static int arch_setup_meminit_pv(struct xc_dom_image *dom)
{
int rc;
xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
@@ -782,7 +787,7 @@ int arch_setup_meminit(struct xc_dom_image *dom)
if ( dom->superpages )
{
- int count = dom->total_pages >> SUPERPAGE_PFN_SHIFT;
+ int count = dom->total_pages >> SUPERPAGE_2MB_SHIFT;
xen_pfn_t extents[count];
dom->p2m_size = dom->total_pages;
@@ -793,9 +798,9 @@ int arch_setup_meminit(struct xc_dom_image *dom)
DOMPRINTF("Populating memory with %d superpages", count);
for ( pfn = 0; pfn < count; pfn++ )
- extents[pfn] = pfn << SUPERPAGE_PFN_SHIFT;
+ extents[pfn] = pfn << SUPERPAGE_2MB_SHIFT;
rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
- count, SUPERPAGE_PFN_SHIFT, 0,
+ count, SUPERPAGE_2MB_SHIFT, 0,
extents);
if ( rc )
return rc;
@@ -805,7 +810,7 @@ int arch_setup_meminit(struct xc_dom_image *dom)
for ( i = 0; i < count; i++ )
{
mfn = extents[i];
- for ( j = 0; j < SUPERPAGE_NR_PFNS; j++, pfn++ )
+ for ( j = 0; j < SUPERPAGE_2MB_NR_PFNS; j++, pfn++ )
dom->p2m_host[pfn] = mfn + j;
}
}
@@ -881,7 +886,7 @@ int arch_setup_meminit(struct xc_dom_image *dom)
unsigned int memflags;
uint64_t pages;
unsigned int pnode = vnode_to_pnode[vmemranges[i].nid];
- int nr_spages = dom->total_pages >> SUPERPAGE_PFN_SHIFT;
+ int nr_spages = dom->total_pages >> SUPERPAGE_2MB_SHIFT;
xen_pfn_t extents[SUPERPAGE_BATCH_SIZE];
xen_pfn_t pfn_base_idx;
@@ -902,11 +907,11 @@ int arch_setup_meminit(struct xc_dom_image *dom)
nr_spages -= count;
for ( pfn = pfn_base_idx, j = 0;
- pfn < pfn_base_idx + (count << SUPERPAGE_PFN_SHIFT);
- pfn += SUPERPAGE_NR_PFNS, j++ )
+ pfn < pfn_base_idx + (count << SUPERPAGE_2MB_SHIFT);
+ pfn += SUPERPAGE_2MB_NR_PFNS, j++ )
extents[j] = dom->p2m_host[pfn];
rc = xc_domain_populate_physmap(dom->xch, dom->guest_domid,
count,
- SUPERPAGE_PFN_SHIFT, memflags,
+ SUPERPAGE_2MB_SHIFT, memflags,
extents);
if ( rc < 0 )
return rc;
@@ -916,7 +921,7 @@ int arch_setup_meminit(struct xc_dom_image *dom)
for ( j = 0; j < rc; j++ )
{
mfn = extents[j];
- for ( k = 0; k < SUPERPAGE_NR_PFNS; k++, pfn++ )
+ for ( k = 0; k < SUPERPAGE_2MB_NR_PFNS; k++, pfn++ )
dom->p2m_host[pfn] = mfn + k;
}
pfn_base_idx = pfn;
@@ -957,6 +962,342 @@ int arch_setup_meminit(struct xc_dom_image *dom)
return rc;
}
+/*
+ * Check whether there exists mmio hole in the specified memory range.
+ * Returns 1 if exists, else returns 0.
+ */
+static int check_mmio_hole(uint64_t start, uint64_t memsize,
+ uint64_t mmio_start, uint64_t mmio_size)
+{
+ if ( start + memsize <= mmio_start || start >= mmio_start + mmio_size )
+ return 0;
+ else
+ return 1;
+}
+
+static int arch_setup_meminit_hvm(struct xc_dom_image *dom)
+{
+ unsigned long i, vmemid, nr_pages = dom->total_pages;
+ unsigned long p2m_size;
+ unsigned long target_pages = dom->target_pages;
+ unsigned long cur_pages, cur_pfn;
+ int rc;
+ xen_capabilities_info_t caps;
+ unsigned long stat_normal_pages = 0, stat_2mb_pages = 0,
+ stat_1gb_pages = 0;
+ unsigned int memflags = 0;
+ int claim_enabled = dom->claim_enabled;
+ uint64_t total_pages;
+ xen_vmemrange_t dummy_vmemrange[2];
+ unsigned int dummy_vnode_to_pnode[1];
+ xen_vmemrange_t *vmemranges;
+ unsigned int *vnode_to_pnode;
+ unsigned int nr_vmemranges, nr_vnodes;
+ xc_interface *xch = dom->xch;
+ uint32_t domid = dom->guest_domid;
+
+ if ( nr_pages > target_pages )
+ memflags |= XENMEMF_populate_on_demand;
+
+ if ( dom->nr_vmemranges == 0 )
+ {
+ /* Build dummy vnode information
+ *
+ * Guest physical address space layout:
+ * [0, hole_start) [hole_start, 4G) [4G, highmem_end)
+ *
+ * Of course if there is no high memory, the second vmemrange
+ * has no effect on the actual result.
+ */
+
+ dummy_vmemrange[0].start = 0;
+ dummy_vmemrange[0].end = dom->lowmem_end;
+ dummy_vmemrange[0].flags = 0;
+ dummy_vmemrange[0].nid = 0;
+ nr_vmemranges = 1;
+
+ if ( dom->highmem_end > (1ULL << 32) )
+ {
+ dummy_vmemrange[1].start = 1ULL << 32;
+ dummy_vmemrange[1].end = dom->highmem_end;
+ dummy_vmemrange[1].flags = 0;
+ dummy_vmemrange[1].nid = 0;
+
+ nr_vmemranges++;
+ }
+
+ dummy_vnode_to_pnode[0] = XC_NUMA_NO_NODE;
+ nr_vnodes = 1;
+ vmemranges = dummy_vmemrange;
+ vnode_to_pnode = dummy_vnode_to_pnode;
+ }
+ else
+ {
+ if ( nr_pages > target_pages )
+ {
+ DOMPRINTF("Cannot enable vNUMA and PoD at the same time");
+ goto error_out;
+ }
+
+ nr_vmemranges = dom->nr_vmemranges;
+ nr_vnodes = dom->nr_vnodes;
+ vmemranges = dom->vmemranges;
+ vnode_to_pnode = dom->vnode_to_pnode;
+ }
+
+ total_pages = 0;
+ p2m_size = 0;
+ for ( i = 0; i < nr_vmemranges; i++ )
+ {
+ total_pages += ((vmemranges[i].end - vmemranges[i].start)
+ >> PAGE_SHIFT);
+ p2m_size = p2m_size > (vmemranges[i].end >> PAGE_SHIFT) ?
+ p2m_size : (vmemranges[i].end >> PAGE_SHIFT);
+ }
+
+ if ( total_pages != nr_pages )
+ {
+ DOMPRINTF("vNUMA memory pages mismatch (0x%"PRIx64" != 0x%"PRIx64")",
+ total_pages, nr_pages);
+ goto error_out;
+ }
+
+ if ( xc_version(xch, XENVER_capabilities, &caps) != 0 )
+ {
+ DOMPRINTF("Could not get Xen capabilities");
+ goto error_out;
+ }
+
+ dom->p2m_size = p2m_size;
+ dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) *
+ dom->p2m_size);
+ if ( dom->p2m_host == NULL )
+ {
+ DOMPRINTF("Could not allocate p2m");
+ goto error_out;
+ }
+
+ for ( i = 0; i < p2m_size; i++ )
+ dom->p2m_host[i] = ((xen_pfn_t)-1);
+ for ( vmemid = 0; vmemid < nr_vmemranges; vmemid++ )
+ {
+ uint64_t pfn;
+
+ for ( pfn = vmemranges[vmemid].start >> PAGE_SHIFT;
+ pfn < vmemranges[vmemid].end >> PAGE_SHIFT;
+ pfn++ )
+ dom->p2m_host[pfn] = pfn;
+ }
+
+ /*
+ * Try to claim pages for early warning of insufficient memory available.
+ * This should go before xc_domain_set_pod_target, becuase that function
+ * actually allocates memory for the guest. Claiming after memory has been
+ * allocated is pointless.
+ */
+ if ( claim_enabled ) {
+ rc = xc_domain_claim_pages(xch, domid, target_pages -
+ dom->vga_hole ? VGA_HOLE_SIZE : 0);
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not allocate memory for HVM guest as we cannot
claim memory!");
+ goto error_out;
+ }
+ }
+
+ if ( memflags & XENMEMF_populate_on_demand )
+ {
+ /*
+ * Subtract VGA_HOLE_SIZE from target_pages for the VGA
+ * "hole". Xen will adjust the PoD cache size so that domain
+ * tot_pages will be target_pages - VGA_HOLE_SIZE after
+ * this call.
+ */
+ rc = xc_domain_set_pod_target(xch, domid,
+ target_pages -
+ dom->vga_hole ? VGA_HOLE_SIZE : 0,
+ NULL, NULL, NULL);
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not set PoD target for HVM guest.\n");
+ goto error_out;
+ }
+ }
+
+ /*
+ * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
+ *
+ * We attempt to allocate 1GB pages if possible. It falls back on 2MB
+ * pages if 1GB allocation fails. 4KB pages will be used eventually if
+ * both fail.
+ *
+ * Under 2MB mode, we allocate pages in batches of no more than 8MB to
+ * ensure that we can be preempted and hence dom0 remains responsive.
+ */
+ if ( dom->vga_hole )
+ rc = xc_domain_populate_physmap_exact(
+ xch, domid, 0xa0, 0, memflags, &dom->p2m_host[0x00]);
+
+ stat_normal_pages = 0;
+ for ( vmemid = 0; vmemid < nr_vmemranges; vmemid++ )
+ {
+ unsigned int new_memflags = memflags;
+ uint64_t end_pages;
+ unsigned int vnode = vmemranges[vmemid].nid;
+ unsigned int pnode = vnode_to_pnode[vnode];
+
+ if ( pnode != XC_NUMA_NO_NODE )
+ new_memflags |= XENMEMF_exact_node(pnode);
+
+ end_pages = vmemranges[vmemid].end >> PAGE_SHIFT;
+ /*
+ * Consider vga hole belongs to the vmemrange that covers
+ * 0xA0000-0xC0000. Note that 0x00000-0xA0000 is populated just
+ * before this loop.
+ */
+ if ( vmemranges[vmemid].start == 0 && dom->vga_hole )
+ {
+ cur_pages = 0xc0;
+ stat_normal_pages += 0xc0;
+ }
+ else
+ cur_pages = vmemranges[vmemid].start >> PAGE_SHIFT;
+
+ while ( (rc == 0) && (end_pages > cur_pages) )
+ {
+ /* Clip count to maximum 1GB extent. */
+ unsigned long count = end_pages - cur_pages;
+ unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS;
+
+ if ( count > max_pages )
+ count = max_pages;
+
+ cur_pfn = dom->p2m_host[cur_pages];
+
+ /* Take care the corner cases of super page tails */
+ if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+ (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) )
+ count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1);
+ else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+ (count > SUPERPAGE_1GB_NR_PFNS) )
+ count &= ~(SUPERPAGE_1GB_NR_PFNS - 1);
+
+ /* Attemp to allocate 1GB super page. Because in each pass
+ * we only allocate at most 1GB, we don't have to clip
+ * super page boundaries.
+ */
+ if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
+ /* Check if there exists MMIO hole in the 1GB memory
+ * range */
+ !check_mmio_hole(cur_pfn << PAGE_SHIFT,
+ SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT,
+ dom->mmio_start, dom->mmio_size) )
+ {
+ long done;
+ unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT;
+ xen_pfn_t sp_extents[nr_extents];
+
+ for ( i = 0; i < nr_extents; i++ )
+ sp_extents[i] =
+ dom->p2m_host[cur_pages+(i<<SUPERPAGE_1GB_SHIFT)];
+
+ done = xc_domain_populate_physmap(xch, domid, nr_extents,
+ SUPERPAGE_1GB_SHIFT,
+ memflags, sp_extents);
+
+ if ( done > 0 )
+ {
+ stat_1gb_pages += done;
+ done <<= SUPERPAGE_1GB_SHIFT;
+ cur_pages += done;
+ count -= done;
+ }
+ }
+
+ if ( count != 0 )
+ {
+ /* Clip count to maximum 8MB extent. */
+ max_pages = SUPERPAGE_2MB_NR_PFNS * 4;
+ if ( count > max_pages )
+ count = max_pages;
+
+ /* Clip partial superpage extents to superpage
+ * boundaries. */
+ if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+ (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) )
+ count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1);
+ else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+ (count > SUPERPAGE_2MB_NR_PFNS) )
+ count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p.
tail */
+
+ /* Attempt to allocate superpage extents. */
+ if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
+ {
+ long done;
+ unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT;
+ xen_pfn_t sp_extents[nr_extents];
+
+ for ( i = 0; i < nr_extents; i++ )
+ sp_extents[i] =
+ dom->p2m_host[cur_pages+(i<<SUPERPAGE_2MB_SHIFT)];
+
+ done = xc_domain_populate_physmap(xch, domid, nr_extents,
+ SUPERPAGE_2MB_SHIFT,
+ memflags, sp_extents);
+
+ if ( done > 0 )
+ {
+ stat_2mb_pages += done;
+ done <<= SUPERPAGE_2MB_SHIFT;
+ cur_pages += done;
+ count -= done;
+ }
+ }
+ }
+
+ /* Fall back to 4kB extents. */
+ if ( count != 0 )
+ {
+ rc = xc_domain_populate_physmap_exact(
+ xch, domid, count, 0, new_memflags,
&dom->p2m_host[cur_pages]);
+ cur_pages += count;
+ stat_normal_pages += count;
+ }
+ }
+
+ if ( rc != 0 )
+ break;
+ }
+
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not allocate memory for HVM guest.");
+ goto error_out;
+ }
+
+ DPRINTF("PHYSICAL MEMORY ALLOCATION:\n");
+ DPRINTF(" 4KB PAGES: 0x%016lx\n", stat_normal_pages);
+ DPRINTF(" 2MB PAGES: 0x%016lx\n", stat_2mb_pages);
+ DPRINTF(" 1GB PAGES: 0x%016lx\n", stat_1gb_pages);
+
+ rc = 0;
+ goto out;
+ error_out:
+ rc = -1;
+ out:
+
+ /* ensure no unclaimed pages are left unused */
+ xc_domain_claim_pages(xch, domid, 0 /* cancels the claim */);
+
+ return rc;
+}
+
+int arch_setup_meminit(struct xc_dom_image *dom)
+{
+ return (dom->container_type == XC_DOM_PV_CONTAINER) ?
+ arch_setup_meminit_pv(dom) : arch_setup_meminit_hvm(dom);
+}
+
int arch_setup_bootearly(struct xc_dom_image *dom)
{
DOMPRINTF("%s: doing nothing", __FUNCTION__);
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 8907bd6..6273052 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -666,6 +666,7 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
dom->xenstore_evtchn = state->store_port;
dom->xenstore_domid = state->store_domid;
dom->claim_enabled = libxl_defbool_val(info->claim_mode);
+ dom->vga_hole = 0;
if (info->num_vnuma_nodes != 0) {
unsigned int i;
--
1.9.5 (Apple Git-50.3)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |