x86: debugging code for testing 16Tb support on smaller memory systems Signed-off-by: Jan Beulich --- v2: Removed unwanted bits and switched to byte-granular "highmem-start" option. --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -546,6 +546,12 @@ Paging (HAP). ### hvm\_port80 > `= ` +### highmem-start +> `= ` + +Specify the memory boundary past which memory will be treated as highmem (x86 +debug hypervisor only). + ### idle\_latency\_factor > `= ` --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -66,8 +66,10 @@ void *map_domain_page(unsigned long mfn) struct mapcache_vcpu *vcache; struct vcpu_maphash_entry *hashent; +#ifdef NDEBUG if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return mfn_to_virt(mfn); +#endif v = mapcache_current_vcpu(); if ( !v || is_hvm_vcpu(v) ) @@ -249,8 +251,10 @@ int mapcache_domain_init(struct domain * if ( is_hvm_domain(d) || is_idle_domain(d) ) return 0; +#ifdef NDEBUG if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return 0; +#endif dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1); d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf); @@ -418,8 +422,10 @@ void *map_domain_page_global(unsigned lo ASSERT(!in_irq() && local_irq_is_enabled()); +#ifdef NDEBUG if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) return mfn_to_virt(mfn); +#endif spin_lock(&globalmap_lock); --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -250,6 +250,14 @@ void __init init_frametable(void) init_spagetable(); } +#ifndef NDEBUG +static unsigned int __read_mostly root_pgt_pv_xen_slots + = ROOT_PAGETABLE_PV_XEN_SLOTS; +static l4_pgentry_t __read_mostly split_l4e; +#else +#define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS +#endif + void __init arch_init_memory(void) { unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn; @@ -344,6 +352,40 @@ void __init arch_init_memory(void) efi_init_memory(); mem_sharing_init(); + +#ifndef NDEBUG + if ( highmem_start ) + { + unsigned long split_va = (unsigned long)__va(highmem_start); + + if ( split_va < HYPERVISOR_VIRT_END && + split_va - 1 == (unsigned long)__va(highmem_start - 1) ) + { + root_pgt_pv_xen_slots = l4_table_offset(split_va) - + ROOT_PAGETABLE_FIRST_XEN_SLOT; + ASSERT(root_pgt_pv_xen_slots < ROOT_PAGETABLE_PV_XEN_SLOTS); + if ( l4_table_offset(split_va) == l4_table_offset(split_va - 1) ) + { + l3_pgentry_t *l3tab = alloc_xen_pagetable(); + + if ( l3tab ) + { + const l3_pgentry_t *l3idle = + l4e_to_l3e(idle_pg_table[l4_table_offset(split_va)]); + + for ( i = 0; i < l3_table_offset(split_va); ++i ) + l3tab[i] = l3idle[i]; + for ( ; i <= L3_PAGETABLE_ENTRIES; ++i ) + l3tab[i] = l3e_empty(); + split_l4e = l4e_from_pfn(virt_to_mfn(l3tab), + __PAGE_HYPERVISOR); + } + else + ++root_pgt_pv_xen_slots; + } + } + } +#endif } int page_is_ram_type(unsigned long mfn, unsigned long mem_type) @@ -1320,7 +1362,12 @@ void init_guest_l4_table(l4_pgentry_t l4 /* Xen private mappings. */ memcpy(&l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT], &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], - ROOT_PAGETABLE_PV_XEN_SLOTS * sizeof(l4_pgentry_t)); + root_pgt_pv_xen_slots * sizeof(l4_pgentry_t)); +#ifndef NDEBUG + if ( l4e_get_intpte(split_l4e) ) + l4tab[ROOT_PAGETABLE_FIRST_XEN_SLOT + root_pgt_pv_xen_slots] = + split_l4e; +#endif l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_pfn(domain_page_map_to_mfn(l4tab), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -82,6 +82,11 @@ boolean_param("noapic", skip_ioapic_setu s8 __read_mostly xen_cpuidle = -1; boolean_param("cpuidle", xen_cpuidle); +#ifndef NDEBUG +unsigned long __initdata highmem_start; +size_param("highmem-start", highmem_start); +#endif + cpumask_t __read_mostly cpu_present_map; unsigned long __read_mostly xen_phys_start; @@ -787,6 +792,14 @@ void __init __start_xen(unsigned long mb modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end); bootstrap_map(NULL); +#ifndef highmem_start + /* Don't allow split below 4Gb. */ + if ( highmem_start < GB(4) ) + highmem_start = 0; + else /* align to L3 entry boundary */ + highmem_start &= ~((1UL << L3_PAGETABLE_SHIFT) - 1); +#endif + for ( i = boot_e820.nr_map-1; i >= 0; i-- ) { uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1; @@ -915,6 +928,9 @@ void __init __start_xen(unsigned long mb /* Don't overlap with other modules. */ end = consider_modules(s, e, size, mod, mbi->mods_count, j); + if ( highmem_start && end > highmem_start ) + continue; + if ( s < end && (headroom || ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) ) @@ -956,6 +972,8 @@ void __init __start_xen(unsigned long mb kexec_reserve_area(&boot_e820); setup_max_pdx(); + if ( highmem_start ) + xenheap_max_mfn(PFN_DOWN(highmem_start)); /* * Walk every RAM region and map it in its entirety (on x86/64, at least) @@ -1127,7 +1145,8 @@ void __init __start_xen(unsigned long mb unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1); uint64_t mask = PAGE_SIZE - 1; - xenheap_max_mfn(limit); + if ( !highmem_start ) + xenheap_max_mfn(limit); /* Pass the remaining memory to the allocator. */ for ( i = 0; i < boot_e820.nr_map; i++ ) --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -45,6 +45,7 @@ #include #ifdef CONFIG_X86 #include +#include /* for highmem_start only */ #else #define p2m_pod_offline_or_broken_hit(pg) 0 #define p2m_pod_offline_or_broken_replace(pg) BUG_ON(pg != NULL) @@ -203,6 +204,25 @@ unsigned long __init alloc_boot_pages( pg = (r->e - nr_pfns) & ~(pfn_align - 1); if ( pg < r->s ) continue; + +#if defined(CONFIG_X86) && !defined(NDEBUG) + /* + * Filtering pfn_align == 1 since the only allocations using a bigger + * alignment are the ones used for setting up the frame table chunks. + * Those allocations get remapped anyway, i.e. them not having 1:1 + * mappings always accessible is not a problem. + */ + if ( highmem_start && pfn_align == 1 && + r->e > PFN_DOWN(highmem_start) ) + { + pg = r->s; + if ( pg + nr_pfns > PFN_DOWN(highmem_start) ) + continue; + r->s = pg + nr_pfns; + return pg; + } +#endif + _e = r->e; r->e = pg; bootmem_region_add(pg + nr_pfns, _e); --- a/xen/include/asm-x86/setup.h +++ b/xen/include/asm-x86/setup.h @@ -43,4 +43,10 @@ void microcode_grab_module( extern uint8_t kbd_shift_flags; +#ifdef NDEBUG +# define highmem_start 0 +#else +extern unsigned long highmem_start; +#endif + #endif