|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH] x86/NUMA: make init_node_heap() respect Xen heap limit
CC George (x86 MM maintainer)
On Thu, Aug 27, 2015 at 02:37:17AM -0600, Jan Beulich wrote:
> On NUMA systems, where we try to use node local memory for the basic
> control structures of the buddy allocator, this special case needs to
> take into consideration a possible address width limit placed on the
> Xen heap. In turn this (but also other, more abstract considerations)
> requires that xenheap_max_mfn() not be called more than once (at most
> we might permit it to be called a second time with a larger value than
> was passed the first time), and be called only before calling
> end_boot_allocator().
>
> While inspecting all the involved code, a couple of off-by-one issues
> were found (and are being corrected here at once):
> - arch_init_memory() cleared one too many page table slots
> - the highmem_start based invocation of xenheap_max_mfn() passed too
> big a value
> - xenheap_max_mfn() calculated the wrong bit count in edge cases
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -369,7 +369,7 @@ void __init arch_init_memory(void)
>
> for ( i = 0; i < l3_table_offset(split_va); ++i )
> l3tab[i] = l3idle[i];
> - for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
> + for ( ; i < L3_PAGETABLE_ENTRIES; ++i )
> l3tab[i] = l3e_empty();
> split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
> __PAGE_HYPERVISOR);
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1002,7 +1002,7 @@ void __init noreturn __start_xen(unsigne
>
> setup_max_pdx(raw_max_page);
> if ( highmem_start )
> - xenheap_max_mfn(PFN_DOWN(highmem_start));
> + xenheap_max_mfn(PFN_DOWN(highmem_start - 1));
>
> /*
> * Walk every RAM region and map it in its entirety (on x86/64, at least)
> @@ -1183,9 +1183,6 @@ void __init noreturn __start_xen(unsigne
>
> numa_initmem_init(0, raw_max_page);
>
> - end_boot_allocator();
> - system_state = SYS_STATE_boot;
> -
> if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
> {
> unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
> @@ -1194,6 +1191,8 @@ void __init noreturn __start_xen(unsigne
> if ( !highmem_start )
> xenheap_max_mfn(limit);
>
> + end_boot_allocator();
> +
> /* Pass the remaining memory to the allocator. */
> for ( i = 0; i < boot_e820.nr_map; i++ )
> {
> @@ -1217,6 +1216,10 @@ void __init noreturn __start_xen(unsigne
> opt_tmem = 0;
> }
> }
> + else
> + end_boot_allocator();
> +
> + system_state = SYS_STATE_boot;
>
> vm_init();
> console_init_ring();
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -404,13 +404,19 @@ void get_outstanding_claims(uint64_t *fr
> spin_unlock(&heap_lock);
> }
>
> +static bool_t __read_mostly first_node_initialised;
> +#ifndef CONFIG_SEPARATE_XENHEAP
> +static unsigned int __read_mostly xenheap_bits;
> +#else
> +#define xenheap_bits 0
> +#endif
> +
> static unsigned long init_node_heap(int node, unsigned long mfn,
> unsigned long nr, bool_t *use_tail)
> {
> /* First node to be discovered has its heap metadata statically alloced.
> */
> static heap_by_zone_and_order_t _heap_static;
> static unsigned long avail_static[NR_ZONES];
> - static int first_node_initialised;
> unsigned long needed = (sizeof(**_heap) +
> sizeof(**avail) * NR_ZONES +
> PAGE_SIZE - 1) >> PAGE_SHIFT;
> @@ -428,14 +434,18 @@ static unsigned long init_node_heap(int
> }
> #ifdef DIRECTMAP_VIRT_END
> else if ( *use_tail && nr >= needed &&
> - (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) )
> + (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) &&
> + (!xenheap_bits ||
> + !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
> {
> _heap[node] = mfn_to_virt(mfn + nr - needed);
> avail[node] = mfn_to_virt(mfn + nr - 1) +
> PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> }
> else if ( nr >= needed &&
> - (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) )
> + (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) &&
> + (!xenheap_bits ||
> + !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
> {
> _heap[node] = mfn_to_virt(mfn);
> avail[node] = mfn_to_virt(mfn + needed - 1) +
> @@ -1543,11 +1553,13 @@ void free_xenheap_pages(void *v, unsigne
>
> #else
>
> -static unsigned int __read_mostly xenheap_bits;
> -
> void __init xenheap_max_mfn(unsigned long mfn)
> {
> - xenheap_bits = flsl(mfn) + PAGE_SHIFT;
> + ASSERT(!first_node_initialised);
> + ASSERT(!xenheap_bits);
> + BUILD_BUG_ON(PADDR_BITS >= BITS_PER_LONG);
> + xenheap_bits = min(flsl(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS);
> + printk(XENLOG_INFO "Xen heap: %u bits\n", xenheap_bits);
> }
>
> void init_xenheap_pages(paddr_t ps, paddr_t pe)
>
>
> x86/NUMA: make init_node_heap() respect Xen heap limit
>
> On NUMA systems, where we try to use node local memory for the basic
> control structures of the buddy allocator, this special case needs to
> take into consideration a possible address width limit placed on the
> Xen heap. In turn this (but also other, more abstract considerations)
> requires that xenheap_max_mfn() not be called more than once (at most
> we might permit it to be called a second time with a larger value than
> was passed the first time), and be called only before calling
> end_boot_allocator().
>
> While inspecting all the involved code, a couple of off-by-one issues
> were found (and are being corrected here at once):
> - arch_init_memory() cleared one too many page table slots
> - the highmem_start based invocation of xenheap_max_mfn() passed too
> big a value
> - xenheap_max_mfn() calculated the wrong bit count in edge cases
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -369,7 +369,7 @@ void __init arch_init_memory(void)
>
> for ( i = 0; i < l3_table_offset(split_va); ++i )
> l3tab[i] = l3idle[i];
> - for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
> + for ( ; i < L3_PAGETABLE_ENTRIES; ++i )
> l3tab[i] = l3e_empty();
> split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
> __PAGE_HYPERVISOR);
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1002,7 +1002,7 @@ void __init noreturn __start_xen(unsigne
>
> setup_max_pdx(raw_max_page);
> if ( highmem_start )
> - xenheap_max_mfn(PFN_DOWN(highmem_start));
> + xenheap_max_mfn(PFN_DOWN(highmem_start - 1));
>
> /*
> * Walk every RAM region and map it in its entirety (on x86/64, at least)
> @@ -1183,9 +1183,6 @@ void __init noreturn __start_xen(unsigne
>
> numa_initmem_init(0, raw_max_page);
>
> - end_boot_allocator();
> - system_state = SYS_STATE_boot;
> -
> if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
> {
> unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
> @@ -1194,6 +1191,8 @@ void __init noreturn __start_xen(unsigne
> if ( !highmem_start )
> xenheap_max_mfn(limit);
>
> + end_boot_allocator();
> +
> /* Pass the remaining memory to the allocator. */
> for ( i = 0; i < boot_e820.nr_map; i++ )
> {
> @@ -1217,6 +1216,10 @@ void __init noreturn __start_xen(unsigne
> opt_tmem = 0;
> }
> }
> + else
> + end_boot_allocator();
> +
> + system_state = SYS_STATE_boot;
>
> vm_init();
> console_init_ring();
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -404,13 +404,19 @@ void get_outstanding_claims(uint64_t *fr
> spin_unlock(&heap_lock);
> }
>
> +static bool_t __read_mostly first_node_initialised;
> +#ifndef CONFIG_SEPARATE_XENHEAP
> +static unsigned int __read_mostly xenheap_bits;
> +#else
> +#define xenheap_bits 0
> +#endif
> +
> static unsigned long init_node_heap(int node, unsigned long mfn,
> unsigned long nr, bool_t *use_tail)
> {
> /* First node to be discovered has its heap metadata statically alloced.
> */
> static heap_by_zone_and_order_t _heap_static;
> static unsigned long avail_static[NR_ZONES];
> - static int first_node_initialised;
> unsigned long needed = (sizeof(**_heap) +
> sizeof(**avail) * NR_ZONES +
> PAGE_SIZE - 1) >> PAGE_SHIFT;
> @@ -428,14 +434,18 @@ static unsigned long init_node_heap(int
> }
> #ifdef DIRECTMAP_VIRT_END
> else if ( *use_tail && nr >= needed &&
> - (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) )
> + (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) &&
> + (!xenheap_bits ||
> + !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
> {
> _heap[node] = mfn_to_virt(mfn + nr - needed);
> avail[node] = mfn_to_virt(mfn + nr - 1) +
> PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> }
> else if ( nr >= needed &&
> - (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) )
> + (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) &&
> + (!xenheap_bits ||
> + !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
> {
> _heap[node] = mfn_to_virt(mfn);
> avail[node] = mfn_to_virt(mfn + needed - 1) +
> @@ -1543,11 +1553,13 @@ void free_xenheap_pages(void *v, unsigne
>
> #else
>
> -static unsigned int __read_mostly xenheap_bits;
> -
> void __init xenheap_max_mfn(unsigned long mfn)
> {
> - xenheap_bits = flsl(mfn) + PAGE_SHIFT;
> + ASSERT(!first_node_initialised);
> + ASSERT(!xenheap_bits);
> + BUILD_BUG_ON(PADDR_BITS >= BITS_PER_LONG);
> + xenheap_bits = min(flsl(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS);
> + printk(XENLOG_INFO "Xen heap: %u bits\n", xenheap_bits);
> }
>
> void init_xenheap_pages(paddr_t ps, paddr_t pe)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |