[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] sched: credit2: respect per-vcpu hard affinity



by making sure that vcpus only run on the pcpu(s) they are allowed to
run on based on their hard affinity cpu masks.

Signed-off-by: Justin T. Weaver <jtweaver@xxxxxxxxxx>
---
 xen/common/sched_credit2.c |  199 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 171 insertions(+), 28 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 1bcd6c0..90e9cdf 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -501,8 +501,9 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, 
struct csched2_vcpu *
         goto tickle;
     }
     
-    /* Get a mask of idle, but not tickled */
+    /* Get a mask of idle, but not tickled, that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->idle, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
     
     /* If it's not empty, choose one */
     i = cpumask_cycle(cpu, &mask);
@@ -513,9 +514,11 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, 
struct csched2_vcpu *
     }
 
     /* Otherwise, look for the non-idle cpu with the lowest credit,
-     * skipping cpus which have been tickled but not scheduled yet */
+     * skipping cpus which have been tickled but not scheduled yet,
+     * that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->active, &rqd->idle);
     cpumask_andnot(&mask, &mask, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
 
     for_each_cpu(i, &mask)
     {
@@ -1038,6 +1041,7 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
     int i, min_rqi = -1, new_cpu;
     struct csched2_vcpu *svc = CSCHED2_VCPU(vc);
     s_time_t min_avgload;
+    cpumask_t temp_mask;
 
     BUG_ON(cpumask_empty(&prv->active_queues));
 
@@ -1053,7 +1057,7 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
      *
      * Since one of the runqueue locks is already held, we can't
      * just grab the prv lock.  Instead, we'll have to trylock, and
-     * do something else reasonable if we fail.
+     * return a safe cpu.
      */
 
     if ( !spin_trylock(&prv->lock) )
@@ -1063,9 +1067,23 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             d2printk("%pv -\n", svc->vcpu);
             clear_bit(__CSFLAG_runq_migrate_request, &svc->flags);
         }
-        /* Leave it where it is for now.  When we actually pay attention
-         * to affinity we'll have to figure something out... */
-        return vc->processor;
+
+        /* Check vc's hard affinity mask with the run queue's active mask. */
+        cpumask_and(&temp_mask, vc->cpu_hard_affinity, &svc->rqd->active);
+        if ( cpumask_empty(&temp_mask) )
+        {
+            /* Can't be assigned to current runqueue; return a safe pcpu. */
+            cpumask_and(&temp_mask, vc->cpu_hard_affinity,
+                cpupool_online_cpumask(vc->domain->cpupool));
+            return cpumask_any(&temp_mask);
+        }
+        else
+            if ( cpumask_test_cpu(vc->processor, &temp_mask) )
+                /* Leave it where it is. */
+                return vc->processor;
+            else
+                /* Same runq, different cpu; affinity must have changed. */
+                return cpumask_any(&temp_mask);
     }
 
     /* First check to see if we're here because someone else suggested a place
@@ -1081,13 +1099,17 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         else
         {
             d2printk("%pv +\n", svc->vcpu);
-            new_cpu = cpumask_cycle(vc->processor, &svc->migrate_rqd->active);
-            goto out_up;
+            cpumask_and(&temp_mask, vc->cpu_hard_affinity,
+                &svc->migrate_rqd->active);
+            if ( !cpumask_empty(&temp_mask) )
+            {
+                new_cpu = cpumask_any(&temp_mask);
+                goto out_up;
+            }
+            /* Fall-through to normal cpu pick */
         }
     }
 
-    /* FIXME: Pay attention to cpu affinity */                                 
                                                     
-
     min_avgload = MAX_LOAD;
 
     /* Find the runqueue with the lowest instantaneous load */
@@ -1099,17 +1121,26 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         rqd = prv->rqd + i;
 
         /* If checking a different runqueue, grab the lock,
-         * read the avg, and then release the lock.
+         * check hard affinity, read the avg, and then release the lock.
          *
          * If on our own runqueue, don't grab or release the lock;
          * but subtract our own load from the runqueue load to simulate
          * impartiality */
         if ( rqd == svc->rqd )
         {
+            cpumask_and(&temp_mask, vc->cpu_hard_affinity, &rqd->active);
+            if ( cpumask_empty(&temp_mask) )
+                continue;
             rqd_avgload = rqd->b_avgload - svc->avgload;
         }
         else if ( spin_trylock(&rqd->lock) )
         {
+            cpumask_and(&temp_mask, vc->cpu_hard_affinity, &rqd->active);
+            if ( cpumask_empty(&temp_mask) )
+            {
+                spin_unlock(&rqd->lock);
+                continue;
+            }
             rqd_avgload = rqd->b_avgload;
             spin_unlock(&rqd->lock);
         }
@@ -1123,12 +1154,30 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         }
     }
 
-    /* We didn't find anyone (most likely because of spinlock contention); 
leave it where it is */
     if ( min_rqi == -1 )
-        new_cpu = vc->processor;
+    {
+        /* No runqs found (most likely because of spinlock contention). */
+        cpumask_and(&temp_mask, vc->cpu_hard_affinity, &svc->rqd->active);
+        if ( cpumask_empty(&temp_mask) )
+        {
+            /* Can't be assigned to current runqueue; return a safe pcpu. */
+            cpumask_and(&temp_mask, vc->cpu_hard_affinity,
+                cpupool_online_cpumask(vc->domain->cpupool));
+            new_cpu = cpumask_any(&temp_mask);
+        }
+        else
+            if ( cpumask_test_cpu(vc->processor, &temp_mask) )
+                /* Leave it where it is. */
+                new_cpu = vc->processor;
+            else
+                /* Same runq, different cpu; affinity must have changed. */
+                new_cpu = cpumask_any(&temp_mask);
+    }
     else
     {
-        new_cpu = cpumask_cycle(vc->processor, &prv->rqd[min_rqi].active);
+        cpumask_and(&temp_mask, vc->cpu_hard_affinity,
+            &prv->rqd[min_rqi].active);
+        new_cpu = cpumask_any(&temp_mask);
         BUG_ON(new_cpu >= nr_cpu_ids);
     }
 
@@ -1197,22 +1246,40 @@ static void migrate(const struct scheduler *ops,
     }
     else
     {
-        int on_runq=0;
-        /* It's not running; just move it */
+        /* It's not running; move it if it's on a different runq than trqd. */
+        bool_t on_runq = 0;
+        cpumask_t temp_mask;
+
         d2printk("%pv %d-%d i\n", svc->vcpu, svc->rqd->id, trqd->id);
+
+        /* Re-assign vcpu's processor, if necessary. */
+        cpumask_and(&temp_mask, svc->vcpu->cpu_hard_affinity, &trqd->active);
+        svc->vcpu->processor = cpumask_any(&temp_mask);
+        if ( !cpumask_test_cpu(svc->vcpu->processor, &temp_mask) )
+            svc->vcpu->processor = cpumask_any(&temp_mask);
+
         if ( __vcpu_on_runq(svc) )
+            on_runq = 1;
+
+        /* If the runqs are different, move svc to trqd. */
+        if ( svc->rqd != trqd )
         {
-            __runq_remove(svc);
-            update_load(ops, svc->rqd, svc, -1, now);
-            on_runq=1;
+            if ( on_runq )
+            {
+                __runq_remove(svc);
+                update_load(ops, svc->rqd, svc, -1, now);
+            }
+            __runq_deassign(svc);
+            __runq_assign(svc, trqd);
+            if ( on_runq )
+            {
+                update_load(ops, svc->rqd, svc, 1, now);
+                runq_insert(ops, svc->vcpu->processor, svc);
+            }
         }
-        __runq_deassign(svc);
-        svc->vcpu->processor = cpumask_any(&trqd->active);
-        __runq_assign(svc, trqd);
+
         if ( on_runq )
         {
-            update_load(ops, svc->rqd, svc, 1, now);
-            runq_insert(ops, svc->vcpu->processor, svc);
             runq_tickle(ops, svc->vcpu->processor, svc, now);
         }
     }
@@ -1224,6 +1291,7 @@ static void balance_load(const struct scheduler *ops, int 
cpu, s_time_t now)
     struct csched2_private *prv = CSCHED2_PRIV(ops);
     int i, max_delta_rqi = -1;
     struct list_head *push_iter, *pull_iter;
+    cpumask_t temp_mask;
 
     balance_state_t st = { .best_push_svc = NULL, .best_pull_svc = NULL };
     
@@ -1250,6 +1318,11 @@ retry:
     for_each_cpu(i, &prv->active_queues)
     {
         s_time_t delta;
+        /* true if there are no vcpus to push due to hard affinity */
+        bool_t ha_no_push = 1;
+        /* true if there are no vcpus to pull due to hard affinity */
+        bool_t ha_no_pull = 1;
+        struct list_head *iter;
         
         st.orqd = prv->rqd + i;
 
@@ -1257,6 +1330,47 @@ retry:
              || !spin_trylock(&st.orqd->lock) )
             continue;
 
+        /*
+         * If due to hard affinity there are no vcpus that can be
+         * pulled or pushed, move to the next runq in the loop.
+         */
+
+        /* See if there are any vcpus that can be pushed from lrqd to orqd. */
+        list_for_each( iter, &st.lrqd->svc )
+        {
+            struct csched2_vcpu * svc =
+                list_entry(iter, struct csched2_vcpu, rqd_elem);
+            cpumask_and(&temp_mask, svc->vcpu->cpu_hard_affinity,
+                &st.orqd->active);
+            if (!cpumask_empty(&temp_mask))
+            {
+                /* vcpu can be pushed from lrqd to ordq. */
+                ha_no_push = 0;
+                break;
+            }
+        }
+
+        /* See if there are any vcpus that can be pulled from orqd to lrqd. */
+        list_for_each( iter, &st.orqd->svc )
+        {
+            struct csched2_vcpu * svc =
+                list_entry(iter, struct csched2_vcpu, rqd_elem);
+            cpumask_and(&temp_mask, svc->vcpu->cpu_hard_affinity,
+                &st.lrqd->active);
+            if (!cpumask_empty(&temp_mask))
+            {
+                /* vcpu can be pulled from orqd to lrdq. */
+                ha_no_pull = 0;
+                break;
+            }
+        }
+
+        if ( ha_no_push && ha_no_pull )
+        {
+            spin_unlock(&st.orqd->lock);
+            continue;
+        }
+
         __update_runq_load(ops, st.orqd, 0, now);
     
         delta = st.lrqd->b_avgload - st.orqd->b_avgload;
@@ -1330,6 +1444,12 @@ retry:
         if ( test_bit(__CSFLAG_runq_migrate_request, &push_svc->flags) )
             continue;
 
+        /* Skip if it can't run on the destination runq. */
+        cpumask_and(&temp_mask, push_svc->vcpu->cpu_hard_affinity,
+            &st.orqd->active);
+        if ( cpumask_empty(&temp_mask) )
+            continue;
+
         list_for_each( pull_iter, &st.orqd->svc )
         {
             struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct 
csched2_vcpu, rqd_elem);
@@ -1338,11 +1458,17 @@ retry:
             {
                 __update_svc_load(ops, pull_svc, 0, now);
             }
-        
+
             /* Skip this one if it's already been flagged to migrate */
             if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) )
                 continue;
 
+            /* Skip if it can't run on the destination runq. */
+            cpumask_and(&temp_mask, pull_svc->vcpu->cpu_hard_affinity,
+                &st.lrqd->active);
+            if ( cpumask_empty(&temp_mask) )
+                continue;
+
             consider(&st, push_svc, pull_svc);
         }
 
@@ -1355,11 +1481,17 @@ retry:
     list_for_each( pull_iter, &st.orqd->svc )
     {
         struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct 
csched2_vcpu, rqd_elem);
-        
+
         /* Skip this one if it's already been flagged to migrate */
         if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) )
             continue;
 
+        /* Skip if it can't run on the destination runq. */
+        cpumask_and(&temp_mask, pull_svc->vcpu->cpu_hard_affinity,
+            &st.lrqd->active);
+        if ( cpumask_empty(&temp_mask) )
+            continue;
+
         /* Consider pull only */
         consider(&st, NULL, pull_svc);
     }
@@ -1399,8 +1531,12 @@ csched2_vcpu_migrate(
 
     trqd = RQD(ops, new_cpu);
 
-    if ( trqd != svc->rqd )
-        migrate(ops, svc, trqd, NOW());
+    /*
+     * Call migrate even if svc->rqd == trqd; there may have been an
+     * affinity change that requires a call to runq_tickle for a new
+     * processor within the same run queue.
+     */
+    migrate(ops, svc, trqd, NOW());
 }
 
 static int
@@ -1610,6 +1746,13 @@ runq_candidate(struct csched2_runqueue_data *rqd,
     {
         struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, 
runq_elem);
 
+        /*
+         * If vcpu is not allowed to run on this processor due to
+         * hard affinity, continue to the next vcpu on the queue.
+         */
+        if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) )
+            continue;
+
         /* If this is on a different processor, don't pull it unless
          * its credit is at least CSCHED2_MIGRATE_RESIST higher. */
         if ( svc->vcpu->processor != cpu
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.