[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] Load increase after memory upgrade (part2)



Konrad, against which kernel version did you produce this patch? It will not 
succeed
with 3.4.2 at least, will look up some older version now...

-----Ursprüngliche Nachricht-----
Von: xen-devel-bounces@xxxxxxxxxxxxx [mailto:xen-devel-bounces@xxxxxxxxxxxxx] 
Im Auftrag von Konrad Rzeszutek Wilk
Gesendet: Mittwoch, 13. Juni 2012 18:55
An: Carsten Schiers
Cc: Konrad Rzeszutek Wilk; xen-devel; Jan Beulich; Sander Eikelenboom
Betreff: Re: [Xen-devel] Load increase after memory upgrade (part2)

On Fri, May 11, 2012 at 03:41:38PM -0400, Konrad Rzeszutek Wilk wrote:
> On Fri, May 11, 2012 at 11:39:08AM +0200, Carsten Schiers wrote:
> > Hi Konrad,
> > 
> >  
> > don't want to be pushy, as I have no real issue. I simply use the Xenified 
> > kernel or take the double load. 
> > 
> > But I think this mistery is still open. My last status was that the 
> > latest patch you produced resulted in a BUG,
> 
> Yes, that is right. Thank you for reminding me.
> > 
> > so we still have not checked whether our theory is correct.
> 
> No we haven't. And I should be have no trouble reproducing this. I can 
> just write a tiny module that allocates vmalloc_32().

Done. Found some bugs.. and here is anew version. Can you please try it out? It 
has the #define DEBUG 1 set so it should print a lot of stuff when the DVB 
module loads. If it crashes please send me the full log.

Thanks.
>From 5afb4ab1fb3d2b059fe1a6db93ab65cb76f43b8a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date: Thu, 31 May 2012 14:21:04 -0400
Subject: [PATCH] xen/vmalloc_32: Use xen_exchange_.. when GFP flags are DMA.
 [v3]

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
 arch/x86/xen/mmu.c    |  187 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/xen-ops.h |    2 +
 mm/vmalloc.c          |   18 +++++-
 3 files changed, 202 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3a73785..960d206 
100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <trace/events/xen.h>
 
@@ -2051,6 +2052,7 @@ void __init xen_init_mmu_ops(void)
 /* Protected by xen_reservation_lock. */  #define MAX_CONTIG_ORDER 9 /* 2MB */ 
 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
 
 #define VOID_PTE (mfn_pte(0, __pgprot(0)))  static void 
xen_zap_pfn_range(unsigned long vaddr, unsigned int order, @@ -2075,6 +2077,42 
@@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
        }
        xen_mc_issue(0);
 }
+static int xen_zap_page_range(struct page *pages, unsigned int order,
+                               unsigned long *in_frames,
+                               unsigned long *out_frames,
+                               void *limit_bitmap)
+{
+       int i, n = 0;
+       struct multicall_space mcs;
+       struct page *page;
+
+       xen_mc_batch();
+       for (i = 0; i < (1UL<<order); i++) {
+               if (!test_bit(i, limit_bitmap))
+                       continue;
+
+               page = &pages[i];
+               mcs = __xen_mc_entry(0);
+#define DEBUG 1
+               if (in_frames) {
+#ifdef DEBUG
+                       printk(KERN_INFO "%s:%d 0x%lx(pfn) 0x%lx (mfn) 
0x%lx(vaddr)\n",
+                               __func__, i, page_to_pfn(page),
+                               pfn_to_mfn(page_to_pfn(page)), 
page_address(page)); #endif
+                       in_frames[i] = pfn_to_mfn(page_to_pfn(page));
+               }
+               MULTI_update_va_mapping(mcs.mc, (unsigned 
long)page_address(page), VOID_PTE, 0);
+               set_phys_to_machine(page_to_pfn(page), INVALID_P2M_ENTRY);
+
+               if (out_frames)
+                       out_frames[i] = page_to_pfn(page);
+               ++n;
+
+       }
+       xen_mc_issue(0);
+       return n;
+}
 
 /*
  * Update the pfn-to-mfn mappings for a virtual address range, either to @@ 
-2118,6 +2156,53 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, 
int order,
 
        xen_mc_issue(0);
 }
+static void xen_remap_exchanged_pages(struct page *pages, int order,
+                                    unsigned long *mfns,
+                                    unsigned long first_mfn, /* in_frame if we 
failed*/
+                                    void *limit_map)
+{
+       unsigned i, limit;
+       unsigned long mfn;
+       struct page *page;
+
+       xen_mc_batch();
+
+       limit = 1ULL << order;
+       for (i = 0; i < limit; i++) {
+               struct multicall_space mcs;
+               unsigned flags;
+
+               if (!test_bit(i, limit_map))
+                       continue;
+
+               page = &pages[i];
+               mcs = __xen_mc_entry(0);
+               if (mfns)
+                       mfn = mfns[i];
+               else
+                       mfn = first_mfn + i;
+
+               if (i < (limit - 1))
+                       flags = 0;
+               else {
+                       if (order == 0)
+                               flags = UVMF_INVLPG | UVMF_ALL;
+                       else
+                               flags = UVMF_TLB_FLUSH | UVMF_ALL;
+               }
+#ifdef DEBUG
+               printk(KERN_INFO "%s (%d) pfn:0x%lx, pfn: 0x%lx vaddr: 0x%lx\n",
+                       __func__, i, page_to_pfn(page), mfn, 
page_address(page)); #endif
+               MULTI_update_va_mapping(mcs.mc, (unsigned 
long)page_address(page),
+                               mfn_pte(mfn, PAGE_KERNEL), flags);
+
+               set_phys_to_machine(page_to_pfn(page), mfn);
+       }
+
+       xen_mc_issue(0);
+}
+
 
 /*
  * Perform the hypercall to exchange a region of our pfns to point to @@ 
-2136,7 +2221,9 @@ static int xen_exchange_memory(unsigned long extents_in, 
unsigned int order_in,  {
        long rc;
        int success;
-
+#ifdef DEBUG
+       int i;
+#endif
        struct xen_memory_exchange exchange = {
                .in = {
                        .nr_extents   = extents_in,
@@ -2157,7 +2244,11 @@ static int xen_exchange_memory(unsigned long extents_in, 
unsigned int order_in,
 
        rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
        success = (exchange.nr_exchanged == extents_in);
-
+#ifdef DEBUG
+       for (i = 0; i <  exchange.nr_exchanged; i++) {
+               printk(KERN_INFO "%s 0x%lx (mfn) <-> 0x%lx (mfn)\n",  
__func__,pfns_in[i], mfns_out[i]);
+       }
+#endif
        BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
        BUG_ON(success && (rc != 0));
 
@@ -2231,8 +2322,8 @@ void xen_destroy_contiguous_region(unsigned long vstart, 
unsigned int order)
        xen_zap_pfn_range(vstart, order, NULL, out_frames);
 
        /* 3. Do the exchange for non-contiguous MFNs. */
-       success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
-                                       0, out_frames, 0);
+       success = xen_exchange_memory(1, order, &in_frame,
+                                     1UL << order, 0, out_frames, 0);
 
        /* 4. Map new pages in place of old pages. */
        if (success)
@@ -2244,6 +2335,94 @@ void xen_destroy_contiguous_region(unsigned long vstart, 
unsigned int order)  }  EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+                              unsigned int address_bits)
+{
+       unsigned long *in_frames = discontig_frames, *out_frames = 
limited_frames;
+       unsigned long  flags;
+       struct page *page;
+       int success;
+       int i, n = 0;
+       unsigned long _limit_map;
+       unsigned long *limit_map;
+
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return 0;
+
+       if (unlikely(order > MAX_CONTIG_ORDER))
+               return -ENOMEM;
+
+       if (BITS_PER_LONG >> order) {
+               limit_map = kzalloc(BITS_TO_LONGS(1U << order) *
+                                   sizeof(*limit_map), GFP_KERNEL);
+               if (unlikely(!limit_map))
+                       return -ENOMEM;
+       } else
+               limit_map = &_limit_map;
+
+       /* 0. Construct our per page bitmap lookup. */
+
+       if (address_bits && (address_bits < PAGE_SHIFT))
+                       return -EINVAL;
+
+       if (order)
+               bitmap_zero(limit_map, 1U << order);
+       else
+               __set_bit(0, limit_map);
+
+       /* 1. Clear the pages */
+       for (i = 0; i < (1ULL << order); i++) {
+               void *vaddr;
+               page = &pages[i];
+
+               vaddr = page_address(page);
+#ifdef DEBUG
+               printk(KERN_INFO "%s: page: %p vaddr: %p 0x%lx(mfn) 
0x%lx(pfn)\n", 
+__func__, page, vaddr, virt_to_mfn(vaddr), mfn_to_pfn(virt_to_mfn(vaddr))); 
#endif
+               if (address_bits) {
+                       if (!(virt_to_mfn(vaddr) >> (address_bits - 
PAGE_SHIFT)))
+                               continue;
+                       __set_bit(i, limit_map);
+               }
+               if (!PageHighMem(page))
+                       memset(vaddr, 0, PAGE_SIZE);
+               else {
+                       memset(kmap(page), 0, PAGE_SIZE);
+                       kunmap(page);
+                       ++n;
+               }
+       }
+       /* Check to see if we actually have to do any work. */
+       if (bitmap_empty(limit_map, 1U << order)) {
+               if (limit_map != &_limit_map)
+                       kfree(limit_map);
+               return 0;
+       }
+       if (n)
+               kmap_flush_unused();
+
+       spin_lock_irqsave(&xen_reservation_lock, flags);
+
+       /* 2. Zap current PTEs. */
+       n = xen_zap_page_range(pages, order, in_frames, NULL /*out_frames */, 
+limit_map);
+
+       /* 3. Do the exchange for non-contiguous MFNs. */
+       success = xen_exchange_memory(n, 0 /* this is always called per page 
*/, in_frames,
+                                     n, 0, out_frames, address_bits);
+
+       /* 4. Map new pages in place of old pages. */
+       if (success)
+               xen_remap_exchanged_pages(pages, order, out_frames, 0, 
limit_map);
+       else
+               xen_remap_exchanged_pages(pages, order, NULL, *in_frames, 
limit_map);
+
+       spin_unlock_irqrestore(&xen_reservation_lock, flags);
+       if (limit_map != &_limit_map)
+               kfree(limit_map);
+
+       return success ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
 #ifdef CONFIG_XEN_PVHVM
 static void xen_hvm_exit_mmap(struct mm_struct *mm)  { diff --git 
a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 6a198e4..2f8709f 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -29,4 +29,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
                               unsigned long mfn, int nr,
                               pgprot_t prot, unsigned domid);
 
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+                              unsigned int address_bits);
 #endif /* INCLUDE_XEN_OPS_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2aad499..194af07 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,8 @@
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
 /*** Page table manipulation functions ***/
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long 
end) @@ -1576,7 +1578,11 @@ static void *__vmalloc_area_node(struct vm_struct 
*area, gfp_t gfp_mask,
        struct page **pages;
        unsigned int nr_pages, array_size, i;
        gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
-
+       gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
+       if (xen_pv_domain()) {
+               if (dma_mask == (__GFP_DMA | __GFP_DMA32))
+                       gfp_mask &= ~(__GFP_DMA | __GFP_DMA32);
+       }
        nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
        array_size = (nr_pages * sizeof(struct page *));
 
@@ -1612,6 +1618,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, 
gfp_t gfp_mask,
                        goto fail;
                }
                area->pages[i] = page;
+               if (xen_pv_domain()) {
+                       if (dma_mask) {
+                               if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
+                                       area->nr_pages = i + 1;
+                                       goto fail;
+                               }
+                       if (gfp_mask & __GFP_ZERO)
+                               clear_highpage(page);
+                       }
+               }
        }
 
        if (map_vm_area(area, prot, &pages))
--
1.7.7.6


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

-----
E-Mail ist virenfrei.
Von AVG überprüft - www.avg.de
Version: 2012.0.2180 / Virendatenbank: 2433/5067 - Ausgabedatum: 13.06.2012 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.