[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Re: [PATCH 4/5] xen: allow extra memory to be in multiple regions



On Fri, Aug 19, 2011 at 03:57:19PM +0100, David Vrabel wrote:
> Allow the extra memory (used by the balloon driver) to be in multiple
> regions regions (typically two regions, one for low memory and one for
> high memory).  This allows the balloon driver to increase the number
> of available low pages (if the initial number if pages is small).
> 
> As a side effect, the algorithm for building the e820 memory map is
> simpler and more obviously correct as the map supplied by the
> hypervisor is (almost) used as is (in particular, all reserved regions
> and gaps are preserved).  Only RAM regions are altered and RAM regions
> above max_pfn + extra_pages are marked as unused (the region is split
> in two if necessary).
> 
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
> ---
>  arch/x86/xen/setup.c |  155 
> ++++++++++++++++++++++----------------------------
>  1 files changed, 67 insertions(+), 88 deletions(-)
> 
> diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> index 4720c2d..93e4542 100644
> --- a/arch/x86/xen/setup.c
> +++ b/arch/x86/xen/setup.c
> @@ -51,23 +51,29 @@ struct xen_memory_region 
> xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
>   */
>  #define EXTRA_MEM_RATIO              (10)
>  
> -static void __init xen_add_extra_mem(unsigned long pages)
> +static void __init xen_add_extra_mem(u64 extra_start, u64 size)
>  {
>       unsigned long pfn;
> +     int i;
>  
> -     u64 size = (u64)pages * PAGE_SIZE;
> -     u64 extra_start = xen_extra_mem[0].start + xen_extra_mem[0].size;
> -
> -     if (!pages)
> -             return;
> -
> -     e820_add_region(extra_start, size, E820_RAM);
> -     sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
> +     for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
> +             /* Add new region. */
> +             if (xen_extra_mem[i].size == 0) {
> +                     xen_extra_mem[i].start = extra_start;
> +                     xen_extra_mem[i].size  = size;
> +                     break;
> +             }
> +             /* Append to existing region. */
> +             if (xen_extra_mem[i].start + xen_extra_mem[i].size == 
> extra_start) {
> +                     xen_extra_mem[i].size += size;
> +                     break;
> +             }
> +     }
> +     if (i == XEN_EXTRA_MEM_MAX_REGIONS)
> +             printk(KERN_WARNING "Warning: not enough extra memory 
> regions\n");
>  
>       memblock_x86_reserve_range(extra_start, extra_start + size, "XEN 
> EXTRA");
>  
> -     xen_extra_mem[0].size += size;
> -
>       xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
>  
>       for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
> @@ -118,7 +124,8 @@ static unsigned long __init xen_release_chunk(phys_addr_t 
> start_addr,
>  }
>  
>  static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
> -                                                  const struct e820map *e820)
> +                                                  const struct e820entry 
> *map,
> +                                                  int nr_map)
>  {
>       phys_addr_t max_addr = PFN_PHYS(max_pfn);
>       phys_addr_t last_end = ISA_END_ADDRESS;
> @@ -126,13 +133,13 @@ static unsigned long __init 
> xen_return_unused_memory(unsigned long max_pfn,
>       int i;
>  
>       /* Free any unused memory above the low 1Mbyte. */
> -     for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
> -             phys_addr_t end = e820->map[i].addr;
> +     for (i = 0; i < nr_map && last_end < max_addr; i++) {
> +             phys_addr_t end = map[i].addr;
>               end = min(max_addr, end);
>  
>               if (last_end < end)
>                       released += xen_release_chunk(last_end, end);
> -             last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
> +             last_end = max(last_end, map[i].addr + map[i].size);
>       }
>  
>       if (last_end < max_addr)
> @@ -203,14 +210,13 @@ static unsigned long __init xen_get_max_pages(void)
>  char * __init xen_memory_setup(void)
>  {
>       static struct e820entry map[E820MAX] __initdata;
> -     static struct e820entry map_raw[E820MAX] __initdata;
>  
>       unsigned long max_pfn = xen_start_info->nr_pages;
>       unsigned long long mem_end;
>       int rc;
>       struct xen_memory_map memmap;
> +     unsigned long max_pages;
>       unsigned long extra_pages = 0;
> -     unsigned long extra_limit;
>       unsigned long identity_pages = 0;
>       int i;
>       int op;
> @@ -237,49 +243,52 @@ char * __init xen_memory_setup(void)
>       }
>       BUG_ON(rc);
>  
> -     memcpy(map_raw, map, sizeof(map));
> -     e820.nr_map = 0;
> -     xen_extra_mem[0].start = mem_end;
> -     for (i = 0; i < memmap.nr_entries; i++) {
> -             unsigned long long end;
> -
> -             /* Guard against non-page aligned E820 entries. */
> -             if (map[i].type == E820_RAM)
> -                     map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
> -
> -             end = map[i].addr + map[i].size;
> -             if (map[i].type == E820_RAM && end > mem_end) {
> -                     /* RAM off the end - may be partially included */
> -                     u64 delta = min(map[i].size, end - mem_end);
> -
> -                     map[i].size -= delta;
> -                     end -= delta;
> -
> -                     extra_pages += PFN_DOWN(delta);
> -                     /*
> -                      * Set RAM below 4GB that is not for us to be unusable.
> -                      * This prevents "System RAM" address space from being
> -                      * used as potential resource for I/O address (happens
> -                      * when 'allocate_resource' is called).
> -                      */
> -                     if (delta &&
> -                             (xen_initial_domain() && end < 0x100000000ULL))
> -                             e820_add_region(end, delta, E820_UNUSABLE);
> +     /* Make sure the Xen-supplied memory map is well-ordered. */
> +     sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
> +
> +     max_pages = xen_get_max_pages();
> +     if (max_pages > max_pfn)
> +             extra_pages += max_pages - max_pfn;
> +
> +     extra_pages += xen_return_unused_memory(max_pfn, map, 
> memmap.nr_entries);
> +
> +     /*
> +      * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
> +      * factor the base size.  On non-highmem systems, the base
> +      * size is the full initial memory allocation; on highmem it
> +      * is limited to the max size of lowmem, so that it doesn't
> +      * get completely filled.
> +      *
> +      * In principle there could be a problem in lowmem systems if
> +      * the initial memory is also very large with respect to
> +      * lowmem, but we won't try to deal with that here.
> +      */
> +     extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), 
> extra_pages);
> +
> +     i = 0;
> +     while (i < memmap.nr_entries) {
> +             u64 addr = map[i].addr;
> +             u64 size = map[i].size;
> +             u32 type = map[i].type;
> +
> +             if (type == E820_RAM) {
> +                     if (addr < mem_end) {
> +                             size = min(size, mem_end - addr);

You removed this little alignmend issue we found on Dell Inspiron laptops:

                /* Guard against non-page aligned E820 entries. */
                if (map[i].type == E820_RAM)
                        map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;

                end = map[i].addr + map[i].size;

Without it we end up dying later on when NUMA was turned on as it tried
to use two pages and "half" of the last page was not RAM and it ended up
going belly up. We need to make sure that the "end" (and the "beginning")
of the E820 entry is page aligned.

> +                     } else if (extra_pages) {
> +                             size = min(size, (u64)extra_pages * PAGE_SIZE);
> +                             extra_pages -= size / PAGE_SIZE;

Those operations can be done using << and >> (using PAGE_SHIFT) respectivly.

> +                             xen_add_extra_mem(addr, size);
> +                     } else
> +                             type = E820_UNUSABLE;
>               }
>  
> -             if (map[i].size > 0 && end > xen_extra_mem[0].start)
> -                     xen_extra_mem[0].start = end;
> +             e820_add_region(addr, size, type);
>  
> -             /* Add region if any remains */
> -             if (map[i].size > 0)
> -                     e820_add_region(map[i].addr, map[i].size, map[i].type);
> +             map[i].addr += size;
> +             map[i].size -= size;
> +             if (map[i].size == 0)
> +                     i++;
>       }
> -     /* Align the balloon area so that max_low_pfn does not get set
> -      * to be at the _end_ of the PCI gap at the far end (fee01000).
> -      * Note that the start of balloon area gets set in the loop above
> -      * to be past the last E820 region. */
> -     if (xen_initial_domain() && (xen_extra_mem[0].start < (1ULL<<32)))
> -             xen_extra_mem[0].start = (1ULL<<32);

I think this issue is not present with your patch - but just to be
on the safe side you might want to ask Stefano to use his box where
he found the issue originally.

>  
>       /*
>        * In domU, the ISA region is normal, usable memory, but we
> @@ -305,41 +314,11 @@ char * __init xen_memory_setup(void)
>  
>       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
>  
> -     extra_limit = xen_get_max_pages();
> -     if (extra_limit >= max_pfn)
> -             extra_pages = extra_limit - max_pfn;
> -     else
> -             extra_pages = 0;
> -
> -     extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, 
> &e820);
> -
> -     /*
> -      * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
> -      * factor the base size.  On non-highmem systems, the base
> -      * size is the full initial memory allocation; on highmem it
> -      * is limited to the max size of lowmem, so that it doesn't
> -      * get completely filled.
> -      *
> -      * In principle there could be a problem in lowmem systems if
> -      * the initial memory is also very large with respect to
> -      * lowmem, but we won't try to deal with that here.
> -      */
> -     extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
> -                       max_pfn + extra_pages);
> -
> -     if (extra_limit >= max_pfn)
> -             extra_pages = extra_limit - max_pfn;
> -     else
> -             extra_pages = 0;
> -
> -     xen_add_extra_mem(extra_pages);
> -
>       /*
>        * Set P2M for all non-RAM pages and E820 gaps to be identity
> -      * type PFNs. We supply it with the non-sanitized version
> -      * of the E820.
> +      * type PFNs.
>        */
> -     identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
> +     identity_pages = xen_set_identity(e820.map, e820.nr_map);
>       printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
>       return "Xen";
>  }
> -- 
> 1.7.2.5

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.