[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 1/8] mm: Place unscrubbed pages at the end of pagelist



. so that it's easy to find pages that need to be scrubbed (those pages are
now marked with _PGC_need_scrub bit).

We keep track of the first unscrubbed page in a page buddy using first_dirty
field. For now it can have two values, 0 (whole buddy needs scrubbing) or
INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent patches
will allow scrubbing to be interrupted, resulting in first_dirty taking any
value.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
Changes in v5:
* Be more careful when returning unused portion of a buddy to the heap in
  alloc_heap_pages() and don't set first_dirty if we know that the sub-buddy
  is clean
* In reserve_offlined_page(), don't try to find dirty pages in sub-buddies if we
  can figure out that there is none.
* Drop unnecessary setting of first_dirty in free_heap_pages()
* Switch to using bitfields in page_info.u.free

I kept node_need_scrub[] as a global array and not a "per-node". I think 
splitting
it should be part of making heap_lock a per-node lock, together with increasing
scrub concurrency by having more than one CPU scrub a node.



 xen/common/page_alloc.c  | 190 +++++++++++++++++++++++++++++++++++++++--------
 xen/include/asm-arm/mm.h |  18 ++++-
 xen/include/asm-x86/mm.h |  17 ++++-
 3 files changed, 190 insertions(+), 35 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 8bcef6a..570d1f7 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -383,6 +383,8 @@ typedef struct page_list_head 
heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 #define heap(node, zone, order) ((*_heap[node])[zone][order])
 
+static unsigned long node_need_scrub[MAX_NUMNODES];
+
 static unsigned long *avail[MAX_NUMNODES];
 static long total_avail_pages;
 
@@ -678,6 +680,20 @@ static void check_low_mem_virq(void)
     }
 }
 
+/* Pages that need a scrub are added to tail, otherwise to head. */
+static void page_list_add_scrub(struct page_info *pg, unsigned int node,
+                                unsigned int zone, unsigned int order,
+                                unsigned int first_dirty)
+{
+    PFN_ORDER(pg) = order;
+    pg->u.free.first_dirty = first_dirty;
+
+    if ( first_dirty != INVALID_DIRTY_IDX )
+        page_list_add_tail(pg, &heap(node, zone, order));
+    else
+        page_list_add(pg, &heap(node, zone, order));
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
@@ -687,7 +703,7 @@ static struct page_info *alloc_heap_pages(
     unsigned int i, j, zone = 0, nodemask_retry = 0;
     nodeid_t first_node, node = MEMF_get_node(memflags), req_node = node;
     unsigned long request = 1UL << order;
-    struct page_info *pg;
+    struct page_info *pg, *first_dirty_pg = NULL;
     nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
     bool_t need_tlbflush = 0;
     uint32_t tlbflush_timestamp = 0;
@@ -798,11 +814,26 @@ static struct page_info *alloc_heap_pages(
     return NULL;
 
  found: 
+
+    if ( pg->u.free.first_dirty != INVALID_DIRTY_IDX )
+        first_dirty_pg = pg + pg->u.free.first_dirty;
+
     /* We may have to halve the chunk a number of times. */
     while ( j != order )
     {
-        PFN_ORDER(pg) = --j;
-        page_list_add_tail(pg, &heap(node, zone, j));
+        unsigned int first_dirty;
+
+        if ( first_dirty_pg && ((pg + (1 << j)) > first_dirty_pg) )
+        {
+            if ( pg < first_dirty_pg )
+                first_dirty = (first_dirty_pg - pg) / sizeof(*pg);
+            else
+                first_dirty = 0;
+        }
+        else
+            first_dirty = INVALID_DIRTY_IDX;
+
+        page_list_add_scrub(pg, node, zone, --j, first_dirty);
         pg += 1 << j;
     }
 
@@ -849,13 +880,22 @@ static int reserve_offlined_page(struct page_info *head)
 {
     unsigned int node = phys_to_nid(page_to_maddr(head));
     int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
-    struct page_info *cur_head;
+    struct page_info *cur_head, *first_dirty_pg = NULL;
     int cur_order;
 
     ASSERT(spin_is_locked(&heap_lock));
 
     cur_head = head;
 
+    /*
+     * We may break the buddy so let's mark the head as clean. Then, when
+     * merging chunks back into the heap, we will see whether the chunk has
+     * unscrubbed pages and set its first_dirty properly.
+     */
+    if (head->u.free.first_dirty != INVALID_DIRTY_IDX)
+        first_dirty_pg = head + head->u.free.first_dirty;
+    head->u.free.first_dirty = INVALID_DIRTY_IDX;
+
     page_list_del(head, &heap(node, zone, head_order));
 
     while ( cur_head < (head + (1 << head_order)) )
@@ -873,6 +913,8 @@ static int reserve_offlined_page(struct page_info *head)
 
         while ( cur_order < head_order )
         {
+            unsigned int first_dirty = INVALID_DIRTY_IDX;
+
             next_order = cur_order + 1;
 
             if ( (cur_head + (1 << next_order)) >= (head + ( 1 << head_order)) 
)
@@ -892,8 +934,25 @@ static int reserve_offlined_page(struct page_info *head)
             {
             merge:
                 /* We don't consider merging outside the head_order. */
-                page_list_add_tail(cur_head, &heap(node, zone, cur_order));
-                PFN_ORDER(cur_head) = cur_order;
+
+                /* See if any of the pages indeed need scrubbing. */
+                if ( first_dirty_pg && (cur_head + (1 << cur_order) > 
first_dirty_pg) )
+                {
+                    if ( cur_head < first_dirty_pg )
+                        i = (first_dirty_pg - cur_head) / sizeof(*cur_head);
+                    else
+                        i = 0;
+
+                    for ( ; i < (1 << cur_order); i++ )
+                        if ( test_bit(_PGC_need_scrub,
+                                      &cur_head[i].count_info) )
+                        {
+                            first_dirty = i;
+                            break;
+                        }
+                }
+                page_list_add_scrub(cur_head, node, zone,
+                                    cur_order, first_dirty);
                 cur_head += (1 << cur_order);
                 break;
             }
@@ -919,9 +978,53 @@ static int reserve_offlined_page(struct page_info *head)
     return count;
 }
 
+static void scrub_free_pages(unsigned int node)
+{
+    struct page_info *pg;
+    unsigned int zone;
+
+    ASSERT(spin_is_locked(&heap_lock));
+
+    if ( !node_need_scrub[node] )
+        return;
+
+    for ( zone = 0; zone < NR_ZONES; zone++ )
+    {
+        unsigned int order = MAX_ORDER;
+
+        do {
+            while ( !page_list_empty(&heap(node, zone, order)) )
+            {
+                unsigned int i;
+
+                /* Unscrubbed pages are always at the end of the list. */
+                pg = page_list_last(&heap(node, zone, order));
+                if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+                    break;
+
+                for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
+                {
+                    if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
+                    {
+                        scrub_one_page(&pg[i]);
+                        pg[i].count_info &= ~PGC_need_scrub;
+                        node_need_scrub[node]--;
+                    }
+                }
+
+                page_list_del(pg, &heap(node, zone, order));
+                page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX);
+
+                if ( node_need_scrub[node] == 0 )
+                    return;
+            }
+        } while ( order-- != 0 );
+    }
+}
+
 /* Free 2^@order set of pages. */
 static void free_heap_pages(
-    struct page_info *pg, unsigned int order)
+    struct page_info *pg, unsigned int order, bool need_scrub)
 {
     unsigned long mask, mfn = page_to_mfn(pg);
     unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
@@ -961,10 +1064,20 @@ static void free_heap_pages(
         /* This page is not a guest frame any more. */
         page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
         set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
+
+        if ( need_scrub )
+            pg[i].count_info |= PGC_need_scrub;
     }
 
     avail[node][zone] += 1 << order;
     total_avail_pages += 1 << order;
+    if ( need_scrub )
+    {
+        node_need_scrub[node] += 1 << order;
+        pg->u.free.first_dirty = 0;
+    }
+    else
+        pg->u.free.first_dirty = INVALID_DIRTY_IDX;
 
     if ( tmem_enabled() )
         midsize_alloc_zone_pages = max(
@@ -977,35 +1090,53 @@ static void free_heap_pages(
 
         if ( (page_to_mfn(pg) & mask) )
         {
+            struct page_info *predecessor = pg - mask;
+
             /* Merge with predecessor block? */
-            if ( !mfn_valid(_mfn(page_to_mfn(pg-mask))) ||
-                 !page_state_is(pg-mask, free) ||
-                 (PFN_ORDER(pg-mask) != order) ||
-                 (phys_to_nid(page_to_maddr(pg-mask)) != node) )
+            if ( !mfn_valid(_mfn(page_to_mfn(predecessor))) ||
+                 !page_state_is(predecessor, free) ||
+                 (PFN_ORDER(predecessor) != order) ||
+                 (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
-            pg -= mask;
-            page_list_del(pg, &heap(node, zone, order));
+
+            page_list_del(predecessor, &heap(node, zone, order));
+
+            if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
+                need_scrub = true;
+                /* ... and keep predecessor's first_dirty. */
+            else if ( pg->u.free.first_dirty != INVALID_DIRTY_IDX )
+                predecessor->u.free.first_dirty = (1U << order) +
+                                                  pg->u.free.first_dirty;
+
+            pg = predecessor;
         }
         else
         {
+            struct page_info *successor = pg + mask;
+
             /* Merge with successor block? */
-            if ( !mfn_valid(_mfn(page_to_mfn(pg+mask))) ||
-                 !page_state_is(pg+mask, free) ||
-                 (PFN_ORDER(pg+mask) != order) ||
-                 (phys_to_nid(page_to_maddr(pg+mask)) != node) )
+            if ( !mfn_valid(_mfn(page_to_mfn(successor))) ||
+                 !page_state_is(successor, free) ||
+                 (PFN_ORDER(successor) != order) ||
+                 (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
-            page_list_del(pg + mask, &heap(node, zone, order));
+            page_list_del(successor, &heap(node, zone, order));
+
+            if ( successor->u.free.first_dirty != INVALID_DIRTY_IDX )
+                need_scrub = true;
         }
 
         order++;
     }
 
-    PFN_ORDER(pg) = order;
-    page_list_add_tail(pg, &heap(node, zone, order));
+    page_list_add_scrub(pg, node, zone, order, pg->u.free.first_dirty);
 
     if ( tainted )
         reserve_offlined_page(pg);
 
+    if ( need_scrub )
+        scrub_free_pages(node);
+
     spin_unlock(&heap_lock);
 }
 
@@ -1226,7 +1357,7 @@ unsigned int online_page(unsigned long mfn, uint32_t 
*status)
     spin_unlock(&heap_lock);
 
     if ( (y & PGC_state) == PGC_state_offlined )
-        free_heap_pages(pg, 0);
+        free_heap_pages(pg, 0, false);
 
     return ret;
 }
@@ -1295,7 +1426,7 @@ static void init_heap_pages(
             nr_pages -= n;
         }
 
-        free_heap_pages(pg+i, 0);
+        free_heap_pages(pg + i, 0, false);
     }
 }
 
@@ -1622,7 +1753,7 @@ void free_xenheap_pages(void *v, unsigned int order)
 
     memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
 
-    free_heap_pages(virt_to_page(v), order);
+    free_heap_pages(virt_to_page(v), order, false);
 }
 
 #else
@@ -1676,12 +1807,9 @@ void free_xenheap_pages(void *v, unsigned int order)
     pg = virt_to_page(v);
 
     for ( i = 0; i < (1u << order); i++ )
-    {
-        scrub_one_page(&pg[i]);
         pg[i].count_info &= ~PGC_xen_heap;
-    }
 
-    free_heap_pages(pg, order);
+    free_heap_pages(pg, order, true);
 }
 
 #endif
@@ -1790,7 +1918,7 @@ struct page_info *alloc_domheap_pages(
     if ( d && !(memflags & MEMF_no_owner) &&
          assign_pages(d, pg, order, memflags) )
     {
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, false);
         return NULL;
     }
     
@@ -1858,11 +1986,7 @@ void free_domheap_pages(struct page_info *pg, unsigned 
int order)
             scrub = 1;
         }
 
-        if ( unlikely(scrub) )
-            for ( i = 0; i < (1 << order); i++ )
-                scrub_one_page(&pg[i]);
-
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, scrub);
     }
 
     if ( drop_dom_ref )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 13c673a..889a85e 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -44,7 +44,16 @@ struct page_info
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
             /* Do TLBs need flushing for safety before next page use? */
-            bool_t need_tlbflush;
+            unsigned long need_tlbflush:1;
+
+            /*
+             * Index of the first *possibly* unscrubbed page in the buddy.
+             * One more than maximum possible order (MAX_ORDER+1) to
+             * accommodate INVALID_DIRTY_IDX.
+             */
+#define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
+            unsigned long first_dirty:MAX_ORDER + 2;
+
         } free;
 
     } u;
@@ -107,6 +116,13 @@ struct page_info
 #define PGC_count_width   PG_shift(9)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
+/*
+ * Page needs to be scrubbed. Since this bit can only be set on a page that is
+ * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit.
+ */
+#define _PGC_need_scrub   _PGC_allocated
+#define PGC_need_scrub    PGC_allocated
+
 extern unsigned long xenheap_mfn_start, xenheap_mfn_end;
 extern vaddr_t xenheap_virt_end;
 #ifdef CONFIG_ARM_64
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index d33599d..cd00bef 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -88,7 +88,15 @@ struct page_info
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
         struct {
             /* Do TLBs need flushing for safety before next page use? */
-            bool_t need_tlbflush;
+            unsigned long need_tlbflush:1;
+
+            /*
+             * Index of the first *possibly* unscrubbed page in the buddy.
+             * One more than maximum possible order (MAX_ORDER+1) to
+             * accommodate INVALID_DIRTY_IDX.
+             */
+#define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
+            unsigned long first_dirty:MAX_ORDER + 2;
         } free;
 
     } u;
@@ -233,6 +241,13 @@ struct page_info
 #define PGC_count_width   PG_shift(9)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
+/*
+ * Page needs to be scrubbed. Since this bit can only be set on a page that is
+ * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit.
+ */
+#define _PGC_need_scrub   _PGC_allocated
+#define PGC_need_scrub    PGC_allocated
+
 struct spage_info
 {
        unsigned long type_info;
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.