[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing



Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
Changes in v4:
* Drop unnecessary ACCESS_ONCE in scrub_free_pages, add smp_wmb()
  in scrub_continue()
* Keep track of first_dirty in scrub_wait_state

 xen/common/page_alloc.c  | 100 ++++++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-arm/mm.h |   4 ++
 xen/include/asm-x86/mm.h |   4 ++
 3 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6e505b1..b365305 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -694,6 +694,17 @@ static void page_list_add_scrub(struct page_info *pg, 
unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+    {
+        head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+        spin_lock_kick();
+        while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+            cpu_relax();
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int 
memflags,
@@ -738,9 +749,15 @@ static struct page_info *get_free_buddy(unsigned int 
zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                         return pg;
+
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
+                        return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -925,6 +942,7 @@ static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1069,6 +1087,29 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = 0;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1090,24 +1131,51 @@ bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(!pg->u.free.scrub_state);
+                pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     cnt++;
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
 
+                    if ( pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order)) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = 0;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemtion. But also count non-scrubbing loop iteration
@@ -1121,6 +1189,17 @@ bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                st.first_dirty = (i == (1UL << order)) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i == (1U << order) )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1128,7 +1207,9 @@ bool scrub_free_pages(void)
                 }
                 else
                     pg->u.free.first_dirty = i + 1;
- 
+
+                pg->u.free.scrub_state = 0;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1137,6 +1218,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
 }
@@ -1175,6 +1258,8 @@ static void free_heap_pages(
         if ( page_state_is(&pg[i], offlined) )
             tainted = 1;
 
+        pg[i].u.free.scrub_state = 0;
+
         /* If a page has no owner it will need no safety TLB flush. */
         pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
         if ( pg[i].u.free.need_tlbflush )
@@ -1218,6 +1303,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1240,6 +1327,9 @@ static void free_heap_pages(
                  (PFN_ORDER(successor) != order) ||
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
+
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
 
             need_scrub |= (successor->u.free.first_dirty != INVALID_DIRTY_IDX);
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 38d4fba..148d60b 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -46,6 +46,10 @@ struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index e20f161..f46b242 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -90,6 +90,10 @@ struct page_info
             /* Index of the first *possibly* unscrubbed page in the buddy. */
 #define INVALID_DIRTY_IDX -1U
             unsigned int first_dirty;
+#define PAGE_SCRUBBING    (1<<0)
+#define PAGE_SCRUB_ABORT  (1<<1)
+            unsigned char scrub_state;
+
             /* Do TLBs need flushing for safety before next page use? */
             bool_t need_tlbflush;
         } free;
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.