[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 6/8] mm: Keep heap accessible to others while scrubbing



Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's BUDDY_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
Changes in v5:
* Fixed off-by-one error in setting first_dirty
* Changed struct page_info.u.free to a union to permit use of ACCESS_ONCE in
  check_and_stop_scrub()
* Renamed PAGE_SCRUBBING etc. macros to BUDDY_SCRUBBING etc

 xen/common/page_alloc.c  | 105 +++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/asm-arm/mm.h |  28 ++++++++-----
 xen/include/asm-x86/mm.h |  29 ++++++++-----
 3 files changed, 138 insertions(+), 24 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 4e2775f..f0e5399 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -687,6 +687,7 @@ static void page_list_add_scrub(struct page_info *pg, 
unsigned int node,
 {
     PFN_ORDER(pg) = order;
     pg->u.free.first_dirty = first_dirty;
+    pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
 
     if ( first_dirty != INVALID_DIRTY_IDX )
         page_list_add_tail(pg, &heap(node, zone, order));
@@ -694,6 +695,25 @@ static void page_list_add_scrub(struct page_info *pg, 
unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state == BUDDY_SCRUBBING )
+    {
+        struct page_info pg;
+
+        head->u.free.scrub_state = BUDDY_SCRUB_ABORT;
+        spin_lock_kick();
+        for ( ; ; )
+        {
+            /* Can't ACCESS_ONCE() a bitfield. */
+            pg.u.free.val = ACCESS_ONCE(head->u.free.val);
+            if ( pg.u.free.scrub_state != BUDDY_SCRUB_ABORT )
+                break;
+            cpu_relax();
+        }
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int 
memflags,
@@ -738,14 +758,19 @@ static struct page_info *get_free_buddy(unsigned int 
zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+                        return pg;
                     /*
                      * We grab single pages (order=0) even if they are
                      * unscrubbed. Given that scrubbing one page is fairly 
quick
                      * it is not worth breaking higher orders.
                      */
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
                         return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -928,6 +953,7 @@ static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1084,6 +1110,29 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1106,25 +1155,53 @@ bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = BUDDY_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
+
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
                     else
                         cnt++;
 
+                    if ( pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order) - 1) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemption. But also count non-scrubbing loop iterations
@@ -1138,6 +1215,17 @@ bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                st.first_dirty = (i >= (1UL << order) - 1) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i >= (1U << order) - 1 )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1146,6 +1234,8 @@ bool scrub_free_pages(void)
                 else
                     pg->u.free.first_dirty = i + 1;
 
+                pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1154,6 +1244,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1235,6 +1327,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1256,6 +1350,9 @@ static void free_heap_pages(
                  (PFN_ORDER(successor) != order) ||
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
+
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
 
             if ( successor->u.free.first_dirty != INVALID_DIRTY_IDX )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 889a85e..625aa16 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -42,18 +42,26 @@ struct page_info
             unsigned long type_info;
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            unsigned long need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more than maximum possible order (MAX_ORDER+1) to
-             * accommodate INVALID_DIRTY_IDX.
-             */
+        union {
+            struct {
+                /* Do TLBs need flushing for safety before next page use? */
+                unsigned long need_tlbflush:1;
+
+                /*
+                 * Index of the first *possibly* unscrubbed page in the buddy.
+                 * One more than maximum possible order (MAX_ORDER+1) to
+                 * accommodate INVALID_DIRTY_IDX.
+                 */
 #define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
-            unsigned long first_dirty:MAX_ORDER + 2;
+                unsigned long first_dirty:MAX_ORDER + 2;
+
+#define BUDDY_NOT_SCRUBBING   0
+#define BUDDY_SCRUBBING       1
+#define BUDDY_SCRUB_ABORT     2
+                unsigned long scrub_state:2;
+            };
 
+           unsigned long val;
         } free;
 
     } u;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index cd00bef..db6f3a5 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -86,17 +86,26 @@ struct page_info
         } sh;
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            unsigned long need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more than maximum possible order (MAX_ORDER+1) to
-             * accommodate INVALID_DIRTY_IDX.
-             */
+        union {
+           struct {
+               /* Do TLBs need flushing for safety before next page use? */
+               unsigned long need_tlbflush:1;
+
+               /*
+                * Index of the first *possibly* unscrubbed page in the buddy.
+                * One more than maximum possible order (MAX_ORDER+1) to
+                * accommodate INVALID_DIRTY_IDX.
+                */
 #define INVALID_DIRTY_IDX (-1UL & (((1UL<<MAX_ORDER) + 2) - 1))
-            unsigned long first_dirty:MAX_ORDER + 2;
+               unsigned long first_dirty:MAX_ORDER + 2;
+
+#define BUDDY_NOT_SCRUBBING   0
+#define BUDDY_SCRUBBING       1
+#define BUDDY_SCRUB_ABORT     2
+               unsigned long scrub_state:2;
+           };
+
+           unsigned long val;
         } free;
 
     } u;
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.