[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 11/11] x86: support up to 16Tb


  • To: "xen-devel" <xen-devel@xxxxxxxxxxxxx>
  • From: "Jan Beulich" <JBeulich@xxxxxxxx>
  • Date: Tue, 22 Jan 2013 10:57:49 +0000
  • Delivery-date: Tue, 22 Jan 2013 10:58:10 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

This mainly involves adjusting the number of L4 entries needing copying
between page tables (which is now different between PV and HVM/idle
domains), and changing the cutoff point and method when more than the
supported amount of memory is found in a system.

Since TMEM doesn't currently cope with the full 1:1 map not always
being visible, it gets forcefully disabled in that case.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -1591,7 +1591,7 @@ void __init efi_init_memory(void)
 
     /* Insert Xen mappings. */
     for ( i = l4_table_offset(HYPERVISOR_VIRT_START);
-          i < l4_table_offset(HYPERVISOR_VIRT_END); ++i )
+          i < l4_table_offset(DIRECTMAP_VIRT_END); ++i )
         efi_l4_pgtable[i] = idle_pg_table[i];
 #endif
 }
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1320,7 +1320,7 @@ void init_guest_l4_table(l4_pgentry_t l4
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
            &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
-           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+           ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t));
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -25,6 +25,7 @@
 #include <xen/dmi.h>
 #include <xen/pfn.h>
 #include <xen/nodemask.h>
+#include <xen/tmem_xen.h> /* for opt_tmem only */
 #include <public/version.h>
 #include <compat/platform.h>
 #include <compat/xen.h>
@@ -381,6 +382,11 @@ static void __init setup_max_pdx(void)
     if ( max_pdx > FRAMETABLE_NR )
         max_pdx = FRAMETABLE_NR;
 
+#ifdef PAGE_LIST_NULL
+    if ( max_pdx >= PAGE_LIST_NULL )
+        max_pdx = PAGE_LIST_NULL - 1;
+#endif
+
     max_page = pdx_to_pfn(max_pdx - 1) + 1;
 }
 
@@ -1031,9 +1037,23 @@ void __init __start_xen(unsigned long mb
         /* Create new mappings /before/ passing memory to the allocator. */
         if ( map_e < e )
         {
-            map_pages_to_xen((unsigned long)__va(map_e), map_e >> PAGE_SHIFT,
-                             (e - map_e) >> PAGE_SHIFT, PAGE_HYPERVISOR);
-            init_boot_pages(map_e, e);
+            uint64_t limit = __pa(HYPERVISOR_VIRT_END - 1) + 1;
+            uint64_t end = min(e, limit);
+
+            if ( map_e < end )
+            {
+                map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
+                                 PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
+                init_boot_pages(map_e, end);
+                map_e = end;
+            }
+        }
+        if ( map_e < e )
+        {
+            /* This range must not be passed to the boot allocator and
+             * must also not be mapped with _PAGE_GLOBAL. */
+            map_pages_to_xen((unsigned long)__va(map_e), PFN_DOWN(map_e),
+                             PFN_DOWN(e - map_e), __PAGE_HYPERVISOR);
         }
         if ( s < map_s )
         {
@@ -1104,6 +1124,34 @@ void __init __start_xen(unsigned long mb
     end_boot_allocator();
     system_state = SYS_STATE_boot;
 
+    if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
+    {
+        unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
+        uint64_t mask = PAGE_SIZE - 1;
+
+        xenheap_max_mfn(limit);
+
+        /* Pass the remaining memory to the allocator. */
+        for ( i = 0; i < boot_e820.nr_map; i++ )
+        {
+            uint64_t s, e;
+
+            s = (boot_e820.map[i].addr + mask) & ~mask;
+            e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+            if ( PFN_DOWN(e) <= limit )
+                continue;
+            if ( PFN_DOWN(s) <= limit )
+                s = pfn_to_paddr(limit + 1);
+            init_domheap_pages(s, e);
+        }
+
+        if ( opt_tmem )
+        {
+           printk(XENLOG_WARNING "Forcing TMEM off\n");
+           opt_tmem = 0;
+        }
+    }
+
     vm_init();
     vesa_init();
 
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1471,10 +1471,23 @@ int memory_add(unsigned long spfn, unsig
         return -EINVAL;
     }
 
-    ret =  map_pages_to_xen((unsigned long)mfn_to_virt(spfn), spfn,
-                            epfn - spfn, PAGE_HYPERVISOR);
-     if ( ret )
-        return ret;
+    i = virt_to_mfn(HYPERVISOR_VIRT_END - 1) + 1;
+    if ( spfn < i )
+    {
+        ret = map_pages_to_xen((unsigned long)mfn_to_virt(spfn), spfn,
+                               min(epfn, i) - spfn, PAGE_HYPERVISOR);
+        if ( ret )
+            return ret;
+    }
+    if ( i < epfn )
+    {
+        if ( i < spfn )
+            i = spfn;
+        ret = map_pages_to_xen((unsigned long)mfn_to_virt(i), i,
+                               epfn - i, __PAGE_HYPERVISOR);
+        if ( ret )
+            return ret;
+    }
 
     old_node_start = NODE_DATA(node)->node_start_pfn;
     old_node_span = NODE_DATA(node)->node_spanned_pages;
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -255,6 +255,9 @@ static unsigned long init_node_heap(int 
     unsigned long needed = (sizeof(**_heap) +
                             sizeof(**avail) * NR_ZONES +
                             PAGE_SIZE - 1) >> PAGE_SHIFT;
+#ifdef DIRECTMAP_VIRT_END
+    unsigned long eva = min(DIRECTMAP_VIRT_END, HYPERVISOR_VIRT_END);
+#endif
     int i, j;
 
     if ( !first_node_initialised )
@@ -266,14 +269,14 @@ static unsigned long init_node_heap(int 
     }
 #ifdef DIRECTMAP_VIRT_END
     else if ( *use_tail && nr >= needed &&
-              (mfn + nr) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
+              (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) )
     {
         _heap[node] = mfn_to_virt(mfn + nr - needed);
         avail[node] = mfn_to_virt(mfn + nr - 1) +
                       PAGE_SIZE - sizeof(**avail) * NR_ZONES;
     }
     else if ( nr >= needed &&
-              (mfn + needed) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
+              (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) )
     {
         _heap[node] = mfn_to_virt(mfn);
         avail[node] = mfn_to_virt(mfn + needed - 1) +
@@ -1205,6 +1208,13 @@ void free_xenheap_pages(void *v, unsigne
 
 #else
 
+static unsigned int __read_mostly xenheap_bits;
+
+void __init xenheap_max_mfn(unsigned long mfn)
+{
+    xenheap_bits = fls(mfn) + PAGE_SHIFT - 1;
+}
+
 void init_xenheap_pages(paddr_t ps, paddr_t pe)
 {
     init_domheap_pages(ps, pe);
@@ -1217,6 +1227,11 @@ void *alloc_xenheap_pages(unsigned int o
 
     ASSERT(!in_irq());
 
+    if ( xenheap_bits && (memflags >> _MEMF_bits) > xenheap_bits )
+        memflags &= ~MEMF_bits(~0);
+    if ( !(memflags >> _MEMF_bits) )
+        memflags |= MEMF_bits(xenheap_bits);
+
     pg = alloc_domheap_pages(NULL, order, memflags);
     if ( unlikely(pg == NULL) )
         return NULL;
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -163,8 +163,12 @@ extern unsigned char boot_edid_info[128]
  *    Page-frame information array.
  *  0xffff830000000000 - 0xffff87ffffffffff [5TB, 5*2^40 bytes, PML4:262-271]
  *    1:1 direct mapping of all physical memory.
- *  0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
- *    Guest-defined use.
+ *  0xffff880000000000 - 0xffffffffffffffff [120TB,             PML4:272-511]
+ *    PV: Guest-defined use.
+ *  0xffff880000000000 - 0xffffff7fffffffff [119.5TB,           PML4:272-510]
+ *    HVM/idle: continuation of 1:1 mapping
+ *  0xffffff8000000000 - 0xffffffffffffffff [512GB, 2^39 bytes  PML4:511]
+ *    HVM/idle: unused
  *
  * Compatibility guest area layout:
  *  0x0000000000000000 - 0x00000000f57fffff [3928MB,            PML4:0]
@@ -183,6 +187,8 @@ extern unsigned char boot_edid_info[128]
 #define ROOT_PAGETABLE_FIRST_XEN_SLOT 256
 #define ROOT_PAGETABLE_LAST_XEN_SLOT  271
 #define ROOT_PAGETABLE_XEN_SLOTS \
+    (L4_PAGETABLE_ENTRIES - ROOT_PAGETABLE_FIRST_XEN_SLOT - 1)
+#define ROOT_PAGETABLE_PV_XEN_SLOTS \
     (ROOT_PAGETABLE_LAST_XEN_SLOT - ROOT_PAGETABLE_FIRST_XEN_SLOT + 1)
 
 /* Hypervisor reserves PML4 slots 256 to 271 inclusive. */
@@ -241,9 +247,9 @@ extern unsigned char boot_edid_info[128]
 #define FRAMETABLE_SIZE         GB(128)
 #define FRAMETABLE_NR           (FRAMETABLE_SIZE / sizeof(*frame_table))
 #define FRAMETABLE_VIRT_START   (FRAMETABLE_VIRT_END - FRAMETABLE_SIZE)
-/* Slot 262-271: A direct 1:1 mapping of all of physical memory. */
+/* Slot 262-271/510: A direct 1:1 mapping of all of physical memory. */
 #define DIRECTMAP_VIRT_START    (PML4_ADDR(262))
-#define DIRECTMAP_SIZE          (PML4_ENTRY_BYTES*10)
+#define DIRECTMAP_SIZE          (PML4_ENTRY_BYTES * (511 - 262))
 #define DIRECTMAP_VIRT_END      (DIRECTMAP_VIRT_START + DIRECTMAP_SIZE)
 
 #ifndef __ASSEMBLY__
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -43,6 +43,7 @@ void end_boot_allocator(void);
 
 /* Xen suballocator. These functions are interrupt-safe. */
 void init_xenheap_pages(paddr_t ps, paddr_t pe);
+void xenheap_max_mfn(unsigned long mfn);
 void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
 void free_xenheap_pages(void *v, unsigned int order);
 #define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
@@ -111,7 +112,7 @@ struct page_list_head
 /* These must only have instances in struct page_info. */
 # define page_list_entry
 
-#define PAGE_LIST_NULL (~0)
+# define PAGE_LIST_NULL ((typeof(((struct page_info){}).list.next))~0)
 
 # if !defined(pdx_to_page) && !defined(page_to_pdx)
 #  if defined(__page_to_mfn) || defined(__mfn_to_page)


Attachment: x86-extend-RAM.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.