[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 12/11] x86: debugging code for testing 16Tb support on smaller memory systems


  • To: "xen-devel" <xen-devel@xxxxxxxxxxxxx>
  • From: "Jan Beulich" <JBeulich@xxxxxxxx>
  • Date: Tue, 22 Jan 2013 10:58:53 +0000
  • Delivery-date: Tue, 22 Jan 2013 10:59:09 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

DO NOT APPLY AS IS.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -66,8 +66,10 @@ void *map_domain_page(unsigned long mfn)
     struct mapcache_vcpu *vcache;
     struct vcpu_maphash_entry *hashent;
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     v = mapcache_current_vcpu();
     if ( !v || is_hvm_vcpu(v) )
@@ -139,6 +141,14 @@ void *map_domain_page(unsigned long mfn)
                 if ( ++i == MAPHASH_ENTRIES )
                     i = 0;
             } while ( i != MAPHASH_HASHFN(mfn) );
+if(idx >= dcache->entries) {//temp
+ mapcache_domain_dump(v->domain);
+ for(i = 0; i < ARRAY_SIZE(vcache->hash); ++i)
+  if(hashent->idx != MAPHASHENT_NOTINUSE) {
+   hashent = &vcache->hash[i];
+   printk("vc[%u]: ref=%u idx=%04x mfn=%08lx\n", i, hashent->refcnt, 
hashent->idx, hashent->mfn);
+  }
+}
         }
         BUG_ON(idx >= dcache->entries);
 
@@ -249,8 +259,10 @@ int mapcache_domain_init(struct domain *
     if ( is_hvm_domain(d) || is_idle_domain(d) )
         return 0;
 
+#ifdef NDEBUG
     if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return 0;
+#endif
 
     dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
     d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf);
@@ -418,8 +430,10 @@ void *map_domain_page_global(unsigned lo
 
     ASSERT(!in_irq() && local_irq_is_enabled());
 
+#ifdef NDEBUG
     if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
         return mfn_to_virt(mfn);
+#endif
 
     spin_lock(&globalmap_lock);
 
@@ -497,3 +511,26 @@ unsigned long domain_page_map_to_mfn(con
 
     return l1e_get_pfn(*pl1e);
 }
+
+void mapcache_domain_dump(struct domain *d) {//temp
+ unsigned i, n = 0;
+ const struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+ const struct vcpu *v;
+ if(is_hvm_domain(d) || is_idle_domain(d))
+  return;
+ for_each_vcpu(d, v) {
+  const struct mapcache_vcpu *vcache = &v->arch.pv_vcpu.mapcache;
+  for(i = 0; i < ARRAY_SIZE(vcache->hash); ++i)
+   n += (vcache->hash[i].idx != MAPHASHENT_NOTINUSE);
+ }
+ printk("Dom%d mc (#=%u v=%u) [%p]:\n", d->domain_id, n, d->max_vcpus, 
__builtin_return_address(0));
+ for(i = 0; i < BITS_TO_LONGS(dcache->entries); ++i)
+  printk("dcu[%02x]: %016lx\n", i, dcache->inuse[i]);
+ for(i = 0; i < BITS_TO_LONGS(dcache->entries); ++i)
+  printk("dcg[%02x]: %016lx\n", i, dcache->garbage[i]);
+ for(i = 0; i < dcache->entries; ++i) {
+  l1_pgentry_t l1e = DCACHE_L1ENT(dcache, i);
+  if((test_bit(i, dcache->inuse) && !test_bit(i, dcache->garbage)) || 
(l1e_get_flags(l1e) & _PAGE_PRESENT))
+   printk("dc[%04x]: %"PRIpte"\n", i, l1e_get_intpte(l1e));
+ }
+}
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -250,6 +250,14 @@ void __init init_frametable(void)
         init_spagetable();
 }
 
+#ifndef NDEBUG
+static unsigned int __read_mostly root_pgt_pv_xen_slots
+    = ROOT_PAGETABLE_PV_XEN_SLOTS;
+static l4_pgentry_t __read_mostly split_l4e;
+#else
+#define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
+#endif
+
 void __init arch_init_memory(void)
 {
     unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
@@ -344,6 +352,41 @@ void __init arch_init_memory(void)
     efi_init_memory();
 
     mem_sharing_init();
+
+#ifndef NDEBUG
+    if ( split_gb )
+    {
+        paddr_t split_pa = split_gb * GB(1);
+        unsigned long split_va = (unsigned long)__va(split_pa);
+
+        if ( split_va < HYPERVISOR_VIRT_END &&
+             split_va - 1 == (unsigned long)__va(split_pa - 1) )
+        {
+            root_pgt_pv_xen_slots = l4_table_offset(split_va) -
+                                    ROOT_PAGETABLE_FIRST_XEN_SLOT;
+            ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS);
+            if ( l4_table_offset(split_va) == l4_table_offset(split_va - 1) )
+            {
+                l3_pgentry_t *l3tab = alloc_xen_pagetable();
+
+                if ( l3tab )
+                {
+                    const l3_pgentry_t *l3idle =
+                        l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)]);
+
+                    for ( i = 0; i < l3_table_offset(split_va); ++i )
+                        l3tab[i] = l3idle[i];
+                    for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
+                        l3tab[i] = l3e_empty();
+                    split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
+                                             __PAGE_HYPERVISOR);
+                }
+                else
+                    ++root_pgt_pv_xen_slots;
+            }
+        }
+    }
+#endif
 }
 
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
@@ -1320,7 +1363,12 @@ void init_guest_l4_table(l4_pgentry_t l4
     /* Xen private mappings. */
     memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT],
            &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
-           ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t));
+           root_pgt_pv_xen_slots * sizeof(l4_pgentry_t));
+#ifndef NDEBUG
+    if ( l4e_get_intpte(split_l4e) )
+        l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots] =
+            split_l4e;
+#endif
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setu
 s8 __read_mostly xen_cpuidle = -1;
 boolean_param("cpuidle", xen_cpuidle);
 
+#ifndef NDEBUG
+unsigned int __initdata split_gb;
+integer_param("split-gb", split_gb);
+#endif
+
 cpumask_t __read_mostly cpu_present_map;
 
 unsigned long __read_mostly xen_phys_start;
@@ -789,6 +794,11 @@ void __init __start_xen(unsigned long mb
     modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end);
     bootstrap_map(NULL);
 
+#ifndef split_gb /* Don't allow split below 4Gb. */
+    if ( split_gb < 4 )
+        split_gb = 0;
+#endif
+
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
         uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
@@ -917,6 +927,9 @@ void __init __start_xen(unsigned long mb
             /* Don't overlap with other modules. */
             end = consider_modules(s, e, size, mod, mbi->mods_count, j);
 
+            if ( split_gb && end > split_gb * GB(1) )
+                continue;
+
             if ( s < end &&
                  (headroom ||
                   ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) )
@@ -958,6 +971,8 @@ void __init __start_xen(unsigned long mb
     kexec_reserve_area(&boot_e820);
 
     setup_max_pdx();
+    if ( split_gb )
+        xenheap_max_mfn(split_gb << (30 - PAGE_SHIFT));
 
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
@@ -1129,7 +1144,8 @@ void __init __start_xen(unsigned long mb
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
         uint64_t mask = PAGE_SIZE - 1;
 
-        xenheap_max_mfn(limit);
+        if ( !split_gb )
+            xenheap_max_mfn(limit);
 
         /* Pass the remaining memory to the allocator. */
         for ( i = 0; i < boot_e820.nr_map; i++ )
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -45,6 +45,7 @@
 #include <asm/flushtlb.h>
 #ifdef CONFIG_X86
 #include <asm/p2m.h>
+#include <asm/setup.h> /* for split_gb only */
 #else
 #define p2m_pod_offline_or_broken_hit(pg) 0
 #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg != NULL)
@@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages(
         pg = (r->e - nr_pfns) & ~(pfn_align - 1);
         if ( pg < r->s )
             continue;
+
+#if defined(CONFIG_X86) && !defined(NDEBUG)
+        /*
+         * Filtering pfn_align == 1 since the only allocations using a bigger
+         * alignment are the ones used for setting up the frame table chunks.
+         * Those allocations get remapped anyway, i.e. them not having 1:1
+         * mappings always accessible is not a problem.
+         */
+        if ( split_gb && pfn_align == 1 &&
+             r->e > (split_gb << (30 - PAGE_SHIFT)) )
+        {
+            pg = r->s;
+            if ( pg + nr_pfns > (split_gb << (30 - PAGE_SHIFT)) )
+                continue;
+            r->s = pg + nr_pfns;
+            return pg;
+        }
+#endif
+
         _e = r->e;
         r->e = pg;
         bootmem_region_add(pg + nr_pfns, _e);
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -72,6 +72,7 @@ struct mapcache_domain {
 
 int mapcache_domain_init(struct domain *);
 void mapcache_domain_exit(struct domain *);
+void mapcache_domain_dump(struct domain *);//temp
 int mapcache_vcpu_init(struct vcpu *);
 void mapcache_override_current(struct vcpu *);
 
--- a/xen/include/asm-x86/setup.h
+++ b/xen/include/asm-x86/setup.h
@@ -43,4 +43,10 @@ void microcode_grab_module(
 
 extern uint8_t kbd_shift_flags;
 
+#ifdef NDEBUG
+# define split_gb 0
+#else
+extern unsigned int split_gb;
+#endif
+
 #endif


Attachment: x86-map-domain-debug.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.