[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 5/5] libxc: create p2m list outside of kernel mapping if supported



In case the kernel of a new pv-domU indicates it is supporting a p2m
list outside the initial kernel mapping by specifying INIT_P2M, let
the domain builder allocate the memory for the p2m list from physical
guest memory only and map it to the address the kernel is expecting.

This will enable loading pv-domUs larger than 512 GB.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
 tools/libxc/include/xc_dom.h |   1 +
 tools/libxc/xc_dom_core.c    |  17 ++++++-
 tools/libxc/xc_dom_x86.c     | 109 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 9117269..5731098 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -210,6 +210,7 @@ struct xc_dom_arch {
     char *native_protocol;
     int page_shift;
     int sizeof_pfn;
+    int p2m_base_supported;
 
     struct xc_dom_arch *next;
 };
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index bd970c5..36a0d63 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -734,6 +734,7 @@ struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
     dom->parms.virt_hypercall = UNSET_ADDR;
     dom->parms.virt_hv_start_low = UNSET_ADDR;
     dom->parms.elf_paddr_offset = UNSET_ADDR;
+    dom->parms.p2m_base = UNSET_ADDR;
 
     dom->alloc_malloc += sizeof(*dom);
     return dom;
@@ -1047,7 +1048,11 @@ int xc_dom_build_image(struct xc_dom_image *dom)
     }
 
     /* allocate other pages */
-    if ( dom->arch_hooks->alloc_p2m_list &&
+    if ( !dom->arch_hooks->p2m_base_supported ||
+         dom->parms.p2m_base >= dom->parms.virt_base ||
+         (dom->parms.p2m_base & (XC_DOM_PAGE_SIZE(dom) - 1)) )
+        dom->parms.p2m_base = UNSET_ADDR;
+    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base == UNSET_ADDR 
&&
          dom->arch_hooks->alloc_p2m_list(dom) != 0 )
         goto err;
     if ( dom->arch_hooks->alloc_magic_pages(dom) != 0 )
@@ -1084,6 +1089,16 @@ int xc_dom_build_image(struct xc_dom_image *dom)
         dom->ramdisk_seg.vend += dom->ramdisk_seg.vstart;
     }
 
+    /* Allocate p2m list if outside of initial kernel mapping. */
+    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base != UNSET_ADDR )
+    {
+        if ( dom->arch_hooks->alloc_p2m_list(dom) != 0 )
+            goto err;
+        dom->p2m_seg.vend = dom->p2m_seg.vend - dom->p2m_seg.vstart;
+        dom->p2m_seg.vstart = dom->parms.p2m_base;
+        dom->p2m_seg.vend += dom->p2m_seg.vstart;
+    }
+
     return 0;
 
  err:
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index 972f081..5c0d28e 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -46,6 +46,8 @@
 #define bits_to_mask(bits)       (((xen_vaddr_t)1 << (bits))-1)
 #define round_down(addr, mask)   ((addr) & ~(mask))
 #define round_up(addr, mask)     ((addr) | (mask))
+#define round_pg(addr)    (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1))
+#define round_pfn(addr)   (((addr) + PAGE_SIZE_X86 - 1) / PAGE_SIZE_X86)
 
 /* get guest IO ABI protocol */
 const char *xc_domain_get_native_protocol(xc_interface *xch,
@@ -424,6 +426,81 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
             }
         }
     }
+
+    if ( dom->parms.p2m_base == UNSET_ADDR )
+        return 0;
+
+    /*
+     * Build the page tables for mapping the p2m list at an address
+     * specified by the to be loaded kernel.
+     * l1pfn holds the pfn of the next page table to allocate.
+     * At each level we might already have an entry filled when setting
+     * up the initial kernel mapping. This can happen for the last entry
+     * of each level only!
+     */
+    l3tab = NULL;
+    l2tab = NULL;
+    l1tab = NULL;
+    l1pfn = round_pfn(dom->p2m_size * dom->arch_hooks->sizeof_pfn) +
+            dom->p2m_seg.pfn;
+
+    for ( addr = dom->parms.p2m_base;
+          addr < dom->parms.p2m_base +
+                 dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+          addr += PAGE_SIZE_X86 )
+    {
+        if ( l3tab == NULL )
+        {
+            l4off = l4_table_offset_x86_64(addr);
+            l3pfn = l4tab[l4off] ? l4pfn + dom->pg_l4 : l1pfn++;
+            l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+            if ( l3tab == NULL )
+                goto pfn_error;
+            l4tab[l4off] =
+                pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
+        }
+
+        if ( l2tab == NULL )
+        {
+            l3off = l3_table_offset_x86_64(addr);
+            l2pfn = l3tab[l3off] ? l3pfn + dom->pg_l3 : l1pfn++;
+            l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+            if ( l2tab == NULL )
+                goto pfn_error;
+            l3tab[l3off] =
+                pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+        }
+
+        if ( l1tab == NULL )
+        {
+            l2off = l2_table_offset_x86_64(addr);
+            l1pfn = l2tab[l2off] ? l2pfn + dom->pg_l2 : l1pfn;
+            l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+            if ( l1tab == NULL )
+                goto pfn_error;
+            l2tab[l2off] =
+                pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+            l1pfn++;
+        }
+
+        l1off = l1_table_offset_x86_64(addr);
+        pgpfn = ((addr - dom->parms.p2m_base) >> PAGE_SHIFT_X86) +
+                dom->p2m_seg.pfn;
+        l1tab[l1off] =
+            pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+
+        if ( l1off == (L1_PAGETABLE_ENTRIES_X86_64 - 1) )
+        {
+            l1tab = NULL;
+            if ( l2off == (L2_PAGETABLE_ENTRIES_X86_64 - 1) )
+            {
+                l2tab = NULL;
+                if ( l3off == (L3_PAGETABLE_ENTRIES_X86_64 - 1) )
+                    l3tab = NULL;
+            }
+        }
+    }
+
     return 0;
 
 pfn_error:
@@ -442,6 +519,27 @@ pfn_error:
 static int alloc_p2m_list(struct xc_dom_image *dom)
 {
     size_t p2m_alloc_size = dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+    xen_vaddr_t from, to;
+    xen_pfn_t tables;
+
+    p2m_alloc_size = round_pg(p2m_alloc_size);
+    if ( dom->parms.p2m_base != UNSET_ADDR )
+    {
+        /* Add space for page tables, 64 bit only. */
+        from = dom->parms.p2m_base;
+        to = from + p2m_alloc_size - 1;
+        tables = 0;
+        tables += nr_page_tables(dom, from, to, L4_PAGETABLE_SHIFT_X86_64);
+        if ( to > (xen_vaddr_t)(~0ULL << L4_PAGETABLE_SHIFT_X86_64) )
+            tables--;
+        tables += nr_page_tables(dom, from, to, L3_PAGETABLE_SHIFT_X86_64);
+        if ( to > (xen_vaddr_t)(~0ULL << L3_PAGETABLE_SHIFT_X86_64) )
+            tables--;
+        tables += nr_page_tables(dom, from, to, L2_PAGETABLE_SHIFT_X86_64);
+        if ( to > (xen_vaddr_t)(~0ULL << L2_PAGETABLE_SHIFT_X86_64) )
+            tables--;
+        p2m_alloc_size += tables << PAGE_SHIFT_X86;
+    }
 
     /* allocate phys2mach table */
     if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach",
@@ -541,6 +639,12 @@ static int start_info_x86_64(struct xc_dom_image *dom)
     start_info->pt_base = dom->pgtables_seg.vstart;
     start_info->nr_pt_frames = dom->pgtables;
     start_info->mfn_list = dom->p2m_seg.vstart;
+    if ( dom->parms.p2m_base != UNSET_ADDR )
+    {
+        start_info->first_p2m_pfn = dom->p2m_seg.pfn;
+        start_info->nr_p2m_frames =
+            (dom->p2m_seg.vend - dom->p2m_seg.vstart) >> PAGE_SHIFT_X86;
+    }
 
     start_info->flags = dom->flags;
     start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
@@ -680,6 +784,7 @@ static struct xc_dom_arch xc_dom_32_pae = {
     .native_protocol = XEN_IO_PROTO_ABI_X86_32,
     .page_shift = PAGE_SHIFT_X86,
     .sizeof_pfn = 4,
+    .p2m_base_supported = 0,
     .alloc_magic_pages = alloc_magic_pages,
     .alloc_p2m_list = alloc_p2m_list,
     .count_pgtables = count_pgtables_x86_32_pae,
@@ -694,6 +799,7 @@ static struct xc_dom_arch xc_dom_64 = {
     .native_protocol = XEN_IO_PROTO_ABI_X86_64,
     .page_shift = PAGE_SHIFT_X86,
     .sizeof_pfn = 8,
+    .p2m_base_supported = 1,
     .alloc_magic_pages = alloc_magic_pages,
     .alloc_p2m_list = alloc_p2m_list,
     .count_pgtables = count_pgtables_x86_64,
@@ -1025,7 +1131,10 @@ int arch_setup_bootlate(struct xc_dom_image *dom)
     if ( !xc_dom_feature_translated(dom) )
     {
         /* paravirtualized guest */
+
+        /* Drop references to all initial page tables before pinning. */
         xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
+        xc_dom_unmap_one(dom, dom->p2m_seg.pfn);
         rc = pin_table(dom->xch, pgd_type,
                        xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
                        dom->guest_domid);
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.