[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] RE: [PATCH 2/6] Change the boot allocator function to support memory add




>-----Original Message-----
>From: Keir Fraser [mailto:keir.fraser@xxxxxxxxxxxxx]
>Sent: 2009年6月28日 17:50
>To: Jiang, Yunhong; Jan Beulich; Tim Deegan;
>xen-devel@xxxxxxxxxxxxxxxxxxx
>Subject: Re: [PATCH 2/6] Change the boot allocator function to
>support memory add
>
>On 28/06/2009 10:27, "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx> wrote:
>
>> This patch extend the boot allocator to support memory add.
>>
>> We take different method for boot allocator bitmap for 32/64
>system. In x32 ,
>> we allocate 512K bitmap to support the whole 16G memory. In
>x86_64, we don't
>> change the allocation when system bootup, instead, we try to
>remap the
>> allocator bitmap to some pre-defined virtual address if
>hot-add happen.
>>
>> We also add several API to allocate the handle boot page
>from some range.
>
>The undelrying implementation ideas may be okay, but calling these
>extensions to the 'boot allocator' makes no sense. The boot
>allocator only
>exists during boot. The fact that the bitmap it uses persists
>beyond boot is
>an implementation detail. Perhaps you just need to change the

So can I understand the allocator bitmap (i.e. those for map_free/map_call) as 
something not just about boot allocator, but for generic bitmap to remember the 
page allocation status?
Maybe we can just using one bit in page_info to indicate if page is allocated 
in future?

>name of your
>new allocator interface functions to something more suitable.

So how about rename arch_extend_boot_allocator() to 
"arch_extend_allocation_bitmap" and extend_boot_allocator to 
"extend_állocation_bitmap". Below is updated patch, please have a look. If it 
is ok, I will update the patchset for it.

Change the boot allocator

diff -r 6ba6134f282c xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/arch/x86/mm.c Sun Jun 28 05:28:46 2009 +0800
@@ -155,7 +155,7 @@ struct domain *dom_xen, *dom_io;

 /* Frame table and its size in pages. */
 struct page_info *__read_mostly frame_table;
-unsigned long max_page;
+unsigned long max_page, max_boot_page;
 unsigned long total_pages;

 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
@@ -207,6 +207,130 @@ void __init init_frametable(void)

     memset(frame_table, 0, nr_pages << PAGE_SHIFT);
 }
+
+#if defined(__x86_64__)
+static int boot_alloc_remapped = 0;
+static unsigned long mapped_pages = 0;
+
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+/*
+ * Extend the boot allocator bitmap and initialize the free/alloc state.
+ * It avoid copy original bitmap through splitting super page mapping into
+ * singer page mapping.
+ * It also use pages between start_pfn/end_pfn as page table page
+ */
+int arch_extend_allocation_bitmap(unsigned long start_pfn,
+                                  unsigned long end_pfn)
+{
+    unsigned long n_size, n_pages, i, s_map_pfn, e_map_pfn;
+
+    if ( (start_pfn > end_pfn) || (start_pfn < max_page) )
+        return -EINVAL;
+
+    if (!boot_alloc_remapped)
+    {
+        unsigned long map_start, map_end, map_end_pfn;
+        unsigned long offset, o_start, o_end, o_size;
+
+        o_start = virt_to_maddr(alloc_bitmap);
+
+        o_size  = max_page / 8;
+        o_size += sizeof(unsigned long);
+        o_size  = round_pgup(o_size);
+        o_end = o_start + o_size;
+
+        map_start = o_start & ~((1UL << L2_PAGETABLE_SHIFT) -1UL);
+        map_end = o_end & ~((1UL << L2_PAGETABLE_SHIFT) -1UL);
+        map_end_pfn = map_end >> PAGE_SHIFT;
+
+        map_pages_to_xen(BOOTALLOC_VIRT_START, map_start >> PAGE_SHIFT,
+                         ((map_end - map_start) >> PAGE_SHIFT) + 1,
+                         PAGE_HYPERVISOR);
+
+        mapped_pages += (map_end - map_start) >> PAGE_SHIFT;
+
+        /*
+         * For pages in the end, it may not be 2M aligned
+         * Try to map them as 4k pages, and add some pages to be 2M aligned
+         * XXX A cleaner way is to make init_boot_allocator 2M aligned, do we
+         * care about those extra pages?
+         */
+        for (i = 0; i < (1UL << PAGETABLE_ORDER); i++)
+        {
+            if (i < (( o_end >> PAGE_SHIFT ) - map_end_pfn))
+                map_pages_to_xen(BOOTALLOC_VIRT_START +
+                                    (mapped_pages << PAGE_SHIFT),
+                                 map_end_pfn + i, 1,
+                                 PAGE_HYPERVISOR);
+            else
+            {
+                struct page_info *pg;
+                pg = alloc_domheap_page(NULL, 0);
+                if (!pg)
+                    return -ENOMEM;
+                map_pages_to_xen(BOOTALLOC_VIRT_START +
+                                    (mapped_pages  << PAGE_SHIFT),
+                                 page_to_mfn(pg), 1, PAGE_HYPERVISOR);
+                /* Mark the new created dummy page as allocated */
+                memset((void *)(BOOTALLOC_VIRT_START + (mapped_pages << 
PAGE_SHIFT)), 0xFFU, 1UL << PAGE_SHIFT);
+            }
+            mapped_pages++;
+        }
+
+        offset = o_start & ((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+
+        alloc_bitmap = (unsigned long *)(BOOTALLOC_VIRT_START + offset);
+        smp_mb();
+        boot_alloc_remapped = 1;
+    }
+
+    n_size = end_pfn / 8;
+    n_size += sizeof(unsigned long);
+    n_size = round_pgup(n_size);
+    n_pages = ((unsigned long)alloc_bitmap + n_size  - BOOTALLOC_VIRT_START) >>
+                PAGE_SHIFT;
+    n_pages += ((1UL << PAGETABLE_ORDER) - 1);
+    n_pages &= ~((1UL << PAGETABLE_ORDER) - 1);
+
+    ASSERT(!(mapped_pages & ((1UL << PAGETABLE_ORDER) -1)) );
+    if ( n_pages <= mapped_pages)
+    {
+        map_free(start_pfn, end_pfn - start_pfn + 1);
+        return start_pfn;
+    }
+
+    s_map_pfn = start_pfn + (1UL << PAGETABLE_ORDER) - 1;
+    s_map_pfn = start_pfn & ~((1UL << PAGETABLE_ORDER) - 1);
+    e_map_pfn = s_map_pfn;
+    for ( i = mapped_pages; i <= n_pages; i += (1UL << PAGETABLE_ORDER) )
+    {
+        map_pages_to_xen(BOOTALLOC_VIRT_START+
+                              (i << PAGE_SHIFT),
+                         s_map_pfn + (i << PAGETABLE_ORDER ),
+                         1UL << PAGETABLE_ORDER,
+                         PAGE_HYPERVISOR);
+        e_map_pfn += (i << PAGETABLE_ORDER);
+    }
+
+    /* All new allocated page are marked as allocated initialy */
+    memset((unsigned char *)(BOOTALLOC_VIRT_START +
+                     (mapped_pages << PAGE_SHIFT)),
+            0xFFU, (n_pages - mapped_pages) << PAGE_SHIFT);
+    mapped_pages = n_pages;
+    map_free(start_pfn, s_map_pfn - start_pfn);
+    map_free(e_map_pfn, end_pfn - e_map_pfn + 1);
+
+    return start_pfn;
+}
+#else
+int arch_extend_allocation_bitmap(unsigned long start_pfn,
+                                  unsigned long end_pfn)
+{
+    map_free(start_pfn, end_pfn - start_pfn + 1);
+       return start_pfn;
+}
+#endif

 void __init arch_init_memory(void)
 {
@@ -4402,8 +4526,6 @@ int ptwr_do_page_fault(struct vcpu *v, u

 void free_xen_pagetable(void *v)
 {
-    extern int early_boot;
-
     if ( early_boot )
         return;

diff -r 6ba6134f282c xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/arch/x86/setup.c      Sun Jun 28 05:27:48 2009 +0800
@@ -765,6 +765,9 @@ void __init __start_xen(unsigned long mb
     reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);

     /* Initialise boot heap. */
+#if defined(CONFIG_X86_32) && defined(CONFIG_MEMORY_HOTPLUG)
+    max_boot_page = 16UL << (30 - PAGE_SHIFT);
+#endif
     allocator_bitmap_end = init_boot_allocator(__pa(&_end));
 #if defined(CONFIG_X86_32)
     xenheap_initial_phys_start = allocator_bitmap_end;
diff -r 6ba6134f282c xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/arch/x86/x86_32/mm.c  Sun Jun 28 05:27:48 2009 +0800
@@ -42,7 +42,6 @@ static unsigned long mpt_size;

 void *alloc_xen_pagetable(void)
 {
-    extern int early_boot;
     extern unsigned long xenheap_initial_phys_start;
     unsigned long mfn;

diff -r 6ba6134f282c xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/arch/x86/x86_64/mm.c  Sun Jun 28 05:27:48 2009 +0800
@@ -57,7 +57,6 @@ l2_pgentry_t __attribute__ ((__section__

 void *alloc_xen_pagetable(void)
 {
-    extern int early_boot;
     unsigned long mfn;

     if ( !early_boot )
diff -r 6ba6134f282c xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/common/page_alloc.c   Sun Jun 28 07:30:10 2009 +0800
@@ -103,7 +103,7 @@ unsigned long *alloc_bitmap;
  *  *_off == Bit offset within an element of the `alloc_bitmap' array.
  */

-static void map_alloc(unsigned long first_page, unsigned long nr_pages)
+void map_alloc(unsigned long first_page, unsigned long nr_pages)
 {
     unsigned long start_off, end_off, curr_idx, end_idx;

@@ -131,7 +131,7 @@ static void map_alloc(unsigned long firs
     }
 }

-static void map_free(unsigned long first_page, unsigned long nr_pages)
+void map_free(unsigned long first_page, unsigned long nr_pages)
 {
     unsigned long start_off, end_off, curr_idx, end_idx;

@@ -178,15 +178,33 @@ paddr_t __init init_boot_allocator(paddr
      * Allocate space for the allocation bitmap. Include an extra longword
      * of padding for possible overrun in map_alloc and map_free.
      */
-    bitmap_size  = max_page / 8;
+    bitmap_size  = (max_boot_page ? max_boot_page : max_page) / 8;
     bitmap_size += sizeof(unsigned long);
     bitmap_size  = round_pgup(bitmap_size);
     alloc_bitmap = (unsigned long *)maddr_to_virt(bitmap_start);

     /* All allocated by default. */
     memset(alloc_bitmap, ~0, bitmap_size);
+    max_boot_page = max_page;

     return bitmap_start + bitmap_size;
+}
+
+int extend_allocation_bitmap(unsigned long spfn, unsigned long epfn)
+{
+    int rc;
+
+    if (epfn < max_page)
+        return 0;
+
+    rc = arch_extend_allocation_bitmap(spfn, epfn);
+
+    if (rc < 0)
+        return rc;
+
+    max_boot_page = epfn;
+
+    return 0;
 }

 void __init init_boot_pages(paddr_t ps, paddr_t pe)
@@ -235,15 +253,22 @@ void __init init_boot_pages(paddr_t ps,
     }
 }

-unsigned long __init alloc_boot_pages(
-    unsigned long nr_pfns, unsigned long pfn_align)
+/*
+ * allocate nr_pfn with "align" alignement between start ~ end
+ */
+unsigned long alloc_in_allocation_bitmap(unsigned long start, unsigned long 
end,
+                                    unsigned long nr_pfns, unsigned long align)
 {
     unsigned long pg, i;
-
-    /* Search backwards to obtain highest available range. */
-    for ( pg = (max_page - nr_pfns) & ~(pfn_align - 1);
-          pg >= first_valid_mfn;
-          pg = (pg + i - nr_pfns) & ~(pfn_align - 1) )
+    if (start < first_valid_mfn)
+        start = first_valid_mfn;
+
+    if ( (end > max_boot_page) )
+        end = max_boot_page;
+
+    for (pg = (end - nr_pfns) & ~(align - 1);
+         pg >= start;
+         pg = (pg + i - nr_pfns) & ~(align - 1) )
     {
         for ( i = 0; i < nr_pfns; i++ )
             if ( allocated_in_map(pg+i) )
@@ -258,6 +283,11 @@ unsigned long __init alloc_boot_pages(
     return 0;
 }

+unsigned long __init alloc_boot_pages(
+    unsigned long nr_pfns, unsigned long pfn_align)
+{
+    return alloc_in_allocation_bitmap(first_valid_mfn, max_page, nr_pfns, 
pfn_align);
+}


 /*************************
@@ -834,7 +864,7 @@ int query_page_offline(unsigned long mfn
  * latter is not on a MAX_ORDER boundary, then we reserve the page by
  * not freeing it to the buddy allocator.
  */
-static void init_heap_pages(
+void init_heap_pages(
     struct page_info *pg, unsigned long nr_pages)
 {
     unsigned int nid_curr, nid_prev;
@@ -894,15 +924,15 @@ static unsigned long avail_heap_pages(
 }

 #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
-void __init end_boot_allocator(void)
+void transfer_pages_to_heap(unsigned long spfn, unsigned long epfn)
 {
     unsigned long i, nr = 0;
     int curr_free, next_free;

     /* Pages that are free now go to the domain sub-allocator. */
-    if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
-        map_alloc(first_valid_mfn, 1);
-    for ( i = first_valid_mfn; i < max_page; i++ )
+    if ( (curr_free = next_free = avail_for_domheap(spfn)) )
+        map_alloc(spfn, 1);
+    for ( i = spfn; i < epfn; i++ )
     {
         curr_free = next_free;
         next_free = avail_for_domheap(i+1);
@@ -918,6 +948,11 @@ void __init end_boot_allocator(void)
     }
     if ( nr )
         init_heap_pages(mfn_to_page(i - nr), nr);
+}
+
+void __init end_boot_allocator(void)
+{
+    transfer_pages_to_heap(first_valid_mfn, max_page);

     if ( !dma_bitsize && (num_online_nodes() > 1) )
     {
@@ -936,6 +971,7 @@ void __init end_boot_allocator(void)
         printk(" DMA width %u bits", dma_bitsize);
     printk("\n");
 }
+
 #undef avail_for_domheap

 /*
diff -r 6ba6134f282c xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/include/asm-ia64/mm.h Sun Jun 28 07:35:17 2009 +0800
@@ -274,6 +274,12 @@ static inline int get_page_and_type(stru
     }

     return rc;
+}
+
+int arch_extend_allocation_bitmap(unsigned long start_pfn,
+                                  unsigned long end_pfn)
+{
+    return -ENOSYS;
 }

 #define        set_machinetophys(_mfn, _pfn) do { } while(0);
diff -r 6ba6134f282c xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/include/asm-x86/config.h      Sun Jun 28 05:27:48 2009 +0800
@@ -162,7 +162,9 @@ extern unsigned int video_mode, video_fl
  *    High read-only compatibility machine-to-phys translation table.
  *  0xffff828c80000000 - 0xffff828cbfffffff [1GB,   2^30 bytes, PML4:261]
  *    Xen text, static data, bss.
- *  0xffff828cc0000000 - 0xffff82ffffffffff [461GB,             PML4:261]
+ *  0xffff828cc0000000 - 0xffff82cfffffffff [1GB,   2^30 bytes,  PML4:261]
+ *    Boot allocator bitmap
+ *  0xffff828cd0000000 - 0xffff82ffffffffff [460GB,             PML4:261]
  *    Reserved for future use.
  *  0xffff830000000000 - 0xffff83ffffffffff [1TB,   2^40 bytes, PML4:262-263]
  *    1:1 direct mapping of all physical memory.
@@ -230,6 +232,9 @@ extern unsigned int video_mode, video_fl
 /* Slot 261: xen text, static data and bss (1GB). */
 #define XEN_VIRT_START          (HIRO_COMPAT_MPT_VIRT_END)
 #define XEN_VIRT_END            (XEN_VIRT_START + (1UL << 30))
+/* Slot 261: Boot allocator bitmap (1GB) */
+#define BOOTALLOC_VIRT_START        XEN_VIRT_END
+#define BOOTALLOC_VIRT_END          (BOOTALLOC_VIRT_START + (1UL << 30))
 /* Slot 262-263: A direct 1:1 mapping of all of physical memory. */
 #define DIRECTMAP_VIRT_START    (PML4_ADDR(262))
 #define DIRECTMAP_VIRT_END      (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
diff -r 6ba6134f282c xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/include/asm-x86/mm.h  Sun Jun 28 05:32:54 2009 +0800
@@ -266,9 +266,13 @@ extern void share_xen_page_with_privileg
     struct page_info *page, int readonly);

 extern struct page_info *frame_table;
-extern unsigned long max_page;
+extern unsigned long max_page, max_boot_page;
 extern unsigned long total_pages;
+
+extern int early_boot;
 void init_frametable(void);
+int arch_extend_allocation_bitmap(unsigned long start_pfn,
+                               unsigned long end_pfn);

 int free_page_type(struct page_info *page, unsigned long type,
                    int preemptible);
diff -r 6ba6134f282c xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Sun Jun 28 05:27:44 2009 +0800
+++ b/xen/include/xen/mm.h      Sun Jun 28 07:31:43 2009 +0800
@@ -38,10 +38,18 @@ struct page_info;

 /* Boot-time allocator. Turns into generic allocator after bootstrap. */
 paddr_t init_boot_allocator(paddr_t bitmap_start);
+void map_free(unsigned long first_page, unsigned long nr_pages);
+void map_alloc(unsigned long first_page, unsigned long nr_pages);
 void init_boot_pages(paddr_t ps, paddr_t pe);
 unsigned long alloc_boot_pages(
     unsigned long nr_pfns, unsigned long pfn_align);
+unsigned long alloc_in_allocation_bitmap(unsigned long start, unsigned long 
end,
+                               unsigned long nr_pfns, unsigned long align);
+void init_heap_pages(
+    struct page_info *pg, unsigned long nr_pages);
 void end_boot_allocator(void);
+void transfer_pages_to_heap(unsigned long spfn, unsigned long epfn);
+int extend_allocation_bitmap(unsigned long spfn, unsigned long epfn);

 /* Xen suballocator. These functions are interrupt-safe. */
 void init_xenheap_pages(paddr_t ps, paddr_t pe);


Thanks
Yunhong Jiang

>
> -- Keir
>
>
>

Attachment: boot_allocator.patch
Description: boot_allocator.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.