[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v7 2/3] mm: make pages allocated with MEMF_no_refcount safe to assign



Currently it is unsafe to assign a domheap page allocated with
MEMF_no_refcount to a domain because the domain't 'tot_pages' will not
be incremented, but will be decrement when the page is freed (since
free_domheap_pages() has no way of telling that the increment was skipped).

This patch allocates a new 'count_info' bit for a PGC_extra flag
which is then used to mark pages when alloc_domheap_pages() is called
with MEMF_no_refcount. The MEMF_no_refcount is *not* passed through to
assign_pages() because it still needs to call domain_adjust_tot_pages() to
make sure the domain is appropriately referenced. assign_pages() is
accordingly modified to account pages marked with PGC_extra to an
'extra_pages' counter, which is then subtracted from 'tot_pages' before it
is checked against 'max_pages', thus avoiding over-allocation errors.

NOTE: steal_page() is also modified to decrement extra_pages in the case of
      a PGC_extra page being stolen from a domain.
      Also, whilst adding the extra_pages counter into struct domain, make
      some cosmetic fixes to comments for neighbouring fields.

Signed-off-by: Paul Durrant <pdurrant@xxxxxxxxxx>
---
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Julien Grall <julien@xxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: Wei Liu <wl@xxxxxxx>
Cc: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>
Cc: "Roger Pau Monné" <roger.pau@xxxxxxxxxx>

v7:
 - s/PGC_no_refcount/PGC_extra/g
 - Re-work allocation to account for 'extra' pages, also making it
   safe to assign PGC_extra pages post-allocation

v6:
 - Add an extra ASSERT into assign_pages() that PGC_no_refcount is not
   set if MEMF_no_refcount is clear
 - ASSERT that count_info is 0 in alloc_domheap_pages() and set to
   PGC_no_refcount rather than ORing

v5:
 - Make sure PGC_no_refcount is set before assign_pages() is called
 - Don't bother to clear PGC_no_refcount in free_domheap_pages() and
   drop ASSERT in free_heap_pages()
 - Don't latch count_info in free_heap_pages()

v4:
 - New in v4
---
 xen/arch/x86/mm.c        |  5 ++++
 xen/common/page_alloc.c  | 49 +++++++++++++++++++++++++++++-----------
 xen/include/asm-arm/mm.h |  5 +++-
 xen/include/asm-x86/mm.h |  7 ++++--
 xen/include/xen/sched.h  | 18 ++++++++-------
 5 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index f50c065af3..5b04db8c21 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4266,6 +4266,11 @@ int steal_page(
     page_list_del(page, &d->page_list);
 
     /* Unlink from original owner. */
+    if ( page->count_info & PGC_extra )
+    {
+        ASSERT(d->extra_pages);
+        d->extra_pages--;
+    }
     if ( !(memflags & MEMF_no_refcount) && !domain_adjust_tot_pages(d, -1) )
         drop_dom_ref = true;
 
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 919a270587..a2d69f222a 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -2256,6 +2256,7 @@ int assign_pages(
 {
     int rc = 0;
     unsigned long i;
+    unsigned int extra_pages = 0;
 
     spin_lock(&d->page_alloc_lock);
 
@@ -2267,13 +2268,19 @@ int assign_pages(
         goto out;
     }
 
+    for ( i = 0; i < (1 << order); i++ )
+        if ( pg[i].count_info & PGC_extra )
+            extra_pages++;
+
     if ( !(memflags & MEMF_no_refcount) )
     {
-        if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
+        unsigned int max_pages = d->max_pages - d->extra_pages - extra_pages;
+
+        if ( unlikely((d->tot_pages + (1 << order)) > max_pages) )
         {
             gprintk(XENLOG_INFO, "Over-allocation for domain %u: "
                     "%u > %u\n", d->domain_id,
-                    d->tot_pages + (1 << order), d->max_pages);
+                    d->tot_pages + (1 << order), max_pages);
             rc = -E2BIG;
             goto out;
         }
@@ -2282,13 +2289,17 @@ int assign_pages(
             get_knownalive_domain(d);
     }
 
+    d->extra_pages += extra_pages;
     for ( i = 0; i < (1 << order); i++ )
     {
+        unsigned long count_info = pg[i].count_info;
+
         ASSERT(page_get_owner(&pg[i]) == NULL);
-        ASSERT(!pg[i].count_info);
+        ASSERT(!(count_info & ~PGC_extra));
         page_set_owner(&pg[i], d);
         smp_wmb(); /* Domain pointer must be visible before updating refcnt. */
-        pg[i].count_info = PGC_allocated | 1;
+        count_info &= PGC_extra;
+        pg[i].count_info = count_info | PGC_allocated | 1;
         page_list_add_tail(&pg[i], &d->page_list);
     }
 
@@ -2314,11 +2325,6 @@ struct page_info *alloc_domheap_pages(
 
     if ( memflags & MEMF_no_owner )
         memflags |= MEMF_no_refcount;
-    else if ( (memflags & MEMF_no_refcount) && d )
-    {
-        ASSERT(!(memflags & MEMF_no_refcount));
-        return NULL;
-    }
 
     if ( !dma_bitsize )
         memflags &= ~MEMF_no_dma;
@@ -2331,11 +2337,23 @@ struct page_info *alloc_domheap_pages(
                                   memflags, d)) == NULL)) )
          return NULL;
 
-    if ( d && !(memflags & MEMF_no_owner) &&
-         assign_pages(d, pg, order, memflags) )
+    if ( d && !(memflags & MEMF_no_owner) )
     {
-        free_heap_pages(pg, order, memflags & MEMF_no_scrub);
-        return NULL;
+        if ( memflags & MEMF_no_refcount )
+        {
+            unsigned long i;
+
+            for ( i = 0; i < (1ul << order); i++ )
+            {
+                ASSERT(!pg[i].count_info);
+                pg[i].count_info = PGC_extra;
+            }
+        }
+        if ( assign_pages(d, pg, order, memflags & ~MEMF_no_refcount) )
+        {
+            free_heap_pages(pg, order, memflags & MEMF_no_scrub);
+            return NULL;
+        }
     }
 
     return pg;
@@ -2383,6 +2401,11 @@ void free_domheap_pages(struct page_info *pg, unsigned 
int order)
                     BUG();
                 }
                 arch_free_heap_page(d, &pg[i]);
+                if ( pg[i].count_info & PGC_extra )
+                {
+                    ASSERT(d->extra_pages);
+                    d->extra_pages--;
+                }
             }
 
             drop_dom_ref = !domain_adjust_tot_pages(d, -(1 << order));
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 333efd3a60..7df91280bc 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -119,9 +119,12 @@ struct page_info
 #define PGC_state_offlined PG_mask(2, 9)
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+/* Page is not reference counted */
+#define _PGC_extra        PG_shift(10)
+#define PGC_extra         PG_mask(1, 10)
 
 /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 /*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 2ca8882ad0..06d64d494d 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -77,9 +77,12 @@
 #define PGC_state_offlined PG_mask(2, 9)
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+/* Page is not reference counted */
+#define _PGC_extra        PG_shift(10)
+#define PGC_extra         PG_mask(1, 10)
 
- /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+/* Count of references to this frame. */
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 7c5c437247..763fcd56a4 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -361,15 +361,17 @@ struct domain
 
     spinlock_t       domain_lock;
 
-    spinlock_t       page_alloc_lock; /* protects all the following fields  */
-    struct page_list_head page_list;  /* linked list */
+    spinlock_t       page_alloc_lock;   /* protects all the following fields */
+    struct page_list_head page_list;    /* linked list */
     struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
-    unsigned int     tot_pages;       /* number of pages currently possesed */
-    unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */
-    unsigned int     outstanding_pages; /* pages claimed but not possessed  */
-    unsigned int     max_pages;       /* maximum value for tot_pages        */
-    atomic_t         shr_pages;       /* number of shared pages             */
-    atomic_t         paged_pages;     /* number of paged-out pages          */
+    unsigned int     tot_pages;         /* number of pages currently possesed 
*/
+    unsigned int     xenheap_pages;     /* number of pages from Xen heap */
+    unsigned int     outstanding_pages; /* pages claimed but not possessed */
+    unsigned int     extra_pages;       /* extra pages not limited by 
max_pages */
+    unsigned int     max_pages;         /* maximum value for tot_pages minus */
+                                        /* extra_pages */
+    atomic_t         shr_pages;         /* number of shared pages */
+    atomic_t         paged_pages;       /* number of paged-out pages */
 
     /* Scheduling. */
     void            *sched_priv;    /* scheduler-specific data */
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.