[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/5] xen: sched-null: support soft-affinity



The null scheduler does not really use hard-affinity for
scheduling, it uses it for 'placement', i.e., for deciding
to what pCPU to statically assign a vCPU.

Let's use soft-affinity in the same way, of course with the
difference that, if there's no free pCPU within the vCPU's
soft-affinity, we go checking the hard-affinity, instead of
putting the vCPU in the waitqueue.

This does has no impact on the scheduling overhead, because
soft-affinity is only considered in cold-path (like when a
vCPU joins the scheduler for the first time, or is manually
moved between pCPUs by the user).

Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
---
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
 xen/common/sched_null.c |  110 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 77 insertions(+), 33 deletions(-)

diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
index 610a150..19c7f0f 100644
--- a/xen/common/sched_null.c
+++ b/xen/common/sched_null.c
@@ -115,9 +115,11 @@ static inline struct null_dom *null_dom(const struct 
domain *d)
     return d->sched_priv;
 }
 
-static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu)
+static inline bool vcpu_check_affinity(struct vcpu *v, unsigned int cpu,
+                                       unsigned int balance_step)
 {
-    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity,
+    affinity_balance_cpumask(v, balance_step, cpumask_scratch_cpu(cpu));
+    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
                 cpupool_domain_cpumask(v->domain));
 
     return cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu));
@@ -279,31 +281,40 @@ static void null_dom_destroy(const struct scheduler *ops, 
struct domain *d)
  */
 static unsigned int pick_cpu(struct null_private *prv, struct vcpu *v)
 {
+    unsigned int bs;
     unsigned int cpu = v->processor, new_cpu;
     cpumask_t *cpus = cpupool_domain_cpumask(v->domain);
 
     ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
 
-    cpumask_and(cpumask_scratch_cpu(cpu), v->cpu_hard_affinity, cpus);
+    for_each_affinity_balance_step( bs )
+    {
+        if ( bs == BALANCE_SOFT_AFFINITY &&
+             !has_soft_affinity(v, v->cpu_hard_affinity) )
+            continue;
 
-    /*
-     * If our processor is free, or we are assigned to it, and it is also
-     * still valid and part of our affinity, just go for it.
-     * (Note that we may call vcpu_check_affinity(), but we deliberately
-     * don't, so we get to keep in the scratch cpumask what we have just
-     * put in it.)
-     */
-    if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu == v)
-                && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
-        return cpu;
+        affinity_balance_cpumask(v, bs, cpumask_scratch_cpu(cpu));
+        cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu), cpus);
 
-    /* If not, just go for a free pCPU, within our affinity, if any */
-    cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
-                &prv->cpus_free);
-    new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
+        /*
+         * If our processor is free, or we are assigned to it, and it is also
+         * still valid and part of our affinity, just go for it.
+         * (Note that we may call vcpu_check_affinity(), but we deliberately
+         * don't, so we get to keep in the scratch cpumask what we have just
+         * put in it.)
+         */
+        if ( likely((per_cpu(npc, cpu).vcpu == NULL || per_cpu(npc, cpu).vcpu 
== v)
+                    && cpumask_test_cpu(cpu, cpumask_scratch_cpu(cpu))) )
+            return cpu;
 
-    if ( likely(new_cpu != nr_cpu_ids) )
-        return new_cpu;
+        /* If not, just go for a free pCPU, within our affinity, if any */
+        cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+                    &prv->cpus_free);
+        new_cpu = cpumask_first(cpumask_scratch_cpu(cpu));
+
+        if ( likely(new_cpu != nr_cpu_ids) )
+            return new_cpu;
+    }
 
     /*
      * If we didn't find any free pCPU, just pick any valid pcpu, even if
@@ -430,6 +441,7 @@ static void null_vcpu_insert(const struct scheduler *ops, 
struct vcpu *v)
 
 static void _vcpu_remove(struct null_private *prv, struct vcpu *v)
 {
+    unsigned int bs;
     unsigned int cpu = v->processor;
     struct null_vcpu *wvc;
 
@@ -441,19 +453,27 @@ static void _vcpu_remove(struct null_private *prv, struct 
vcpu *v)
 
     /*
      * If v is assigned to a pCPU, let's see if there is someone waiting,
-     * suitable to be assigned to it.
+     * suitable to be assigned to it (prioritizing vcpus that have
+     * soft-affinity with cpu).
      */
-    list_for_each_entry( wvc, &prv->waitq, waitq_elem )
+    for_each_affinity_balance_step( bs )
     {
-        if ( vcpu_check_affinity(wvc->vcpu, cpu) )
+        list_for_each_entry( wvc, &prv->waitq, waitq_elem )
         {
-            list_del_init(&wvc->waitq_elem);
-            vcpu_assign(prv, wvc->vcpu, cpu);
-            cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
-            break;
+            if ( bs == BALANCE_SOFT_AFFINITY &&
+                 !has_soft_affinity(wvc->vcpu, wvc->vcpu->cpu_hard_affinity) )
+                continue;
+
+            if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
+            {
+                list_del_init(&wvc->waitq_elem);
+                vcpu_assign(prv, wvc->vcpu, cpu);
+                cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+                spin_unlock(&prv->waitq_lock);
+                return;
+            }
         }
     }
-
     spin_unlock(&prv->waitq_lock);
 }
 
@@ -570,7 +590,8 @@ static void null_vcpu_migrate(const struct scheduler *ops, 
struct vcpu *v,
      *
      * In latter, all we can do is to park v in the waitqueue.
      */
-    if ( per_cpu(npc, new_cpu).vcpu == NULL && vcpu_check_affinity(v, new_cpu) 
)
+    if ( per_cpu(npc, new_cpu).vcpu == NULL &&
+         vcpu_check_affinity(v, new_cpu, BALANCE_HARD_AFFINITY) )
     {
         /* v might have been in the waitqueue, so remove it */
         spin_lock(&prv->waitq_lock);
@@ -633,6 +654,7 @@ static struct task_slice null_schedule(const struct 
scheduler *ops,
                                        s_time_t now,
                                        bool_t tasklet_work_scheduled)
 {
+    unsigned int bs;
     const unsigned int cpu = smp_processor_id();
     struct null_private *prv = null_priv(ops);
     struct null_vcpu *wvc;
@@ -656,13 +678,35 @@ static struct task_slice null_schedule(const struct 
scheduler *ops,
     if ( unlikely(ret.task == NULL) )
     {
         spin_lock(&prv->waitq_lock);
-        wvc = list_first_entry_or_null(&prv->waitq, struct null_vcpu, 
waitq_elem);
-        if ( wvc && vcpu_check_affinity(wvc->vcpu, cpu) )
+
+        if ( list_empty(&prv->waitq) )
+            goto unlock;
+
+        /*
+         * We scan the waitqueue twice, for prioritizing vcpus that have
+         * soft-affinity with cpu. This may look like something expensive to
+         * do here in null_schedule(), but it's actually fine, beceuse we do
+         * it only in cases where a pcpu has no vcpu associated (e.g., as
+         * said above, the cpu has just joined a cpupool).
+         */
+        for_each_affinity_balance_step( bs )
         {
-            vcpu_assign(prv, wvc->vcpu, cpu);
-            list_del_init(&wvc->waitq_elem);
-            ret.task = wvc->vcpu;
+            list_for_each_entry( wvc, &prv->waitq, waitq_elem )
+            {
+                if ( bs == BALANCE_SOFT_AFFINITY &&
+                     !has_soft_affinity(wvc->vcpu, 
wvc->vcpu->cpu_hard_affinity) )
+                    continue;
+
+                if ( vcpu_check_affinity(wvc->vcpu, cpu, bs) )
+                {
+                    vcpu_assign(prv, wvc->vcpu, cpu);
+                    list_del_init(&wvc->waitq_elem);
+                    ret.task = wvc->vcpu;
+                    goto unlock;
+                }
+            }
         }
+ unlock:
         spin_unlock(&prv->waitq_lock);
     }
 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.