[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v6 05/20] xen/sched: support allocating multiple vcpus into one sched unit



With a scheduling granularity greater than 1 multiple vcpus share the
same struct sched_unit. Support that.

Setting the initial processor must be done carefully: we can't use
sched_set_res() as that relies on for_each_sched_unit_vcpu() which in
turn needs the vcpu already as a member of the domain's vcpu linked
list, which isn't the case.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
Reviewed-by: Dario Faggioli <dfaggioli@xxxxxxxx>
---
V4:
- merge patch 36 of V3 into this one (Jan Beulich)
- add some comments (Jan Beulich)
- use unit_id instead of vcpu_list->vcpu_id (Jan Beulich)
---
 xen/common/schedule.c | 97 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 76 insertions(+), 21 deletions(-)

diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 36b1d3df6e..37002b4c0e 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -349,7 +349,7 @@ static void sched_spin_unlock_double(spinlock_t *lock1, 
spinlock_t *lock2,
     spin_unlock_irqrestore(lock1, flags);
 }
 
-static void sched_free_unit(struct sched_unit *unit)
+static void sched_free_unit_mem(struct sched_unit *unit)
 {
     struct sched_unit *prev_unit;
     struct domain *d = unit->domain;
@@ -368,8 +368,6 @@ static void sched_free_unit(struct sched_unit *unit)
         }
     }
 
-    unit->vcpu_list->sched_unit = NULL;
-
     free_cpumask_var(unit->cpu_hard_affinity);
     free_cpumask_var(unit->cpu_hard_affinity_saved);
     free_cpumask_var(unit->cpu_soft_affinity);
@@ -377,18 +375,65 @@ static void sched_free_unit(struct sched_unit *unit)
     xfree(unit);
 }
 
+static void sched_free_unit(struct sched_unit *unit, struct vcpu *v)
+{
+    struct vcpu *vunit;
+    unsigned int cnt = 0;
+
+    /* Don't count to be released vcpu, might be not in vcpu list yet. */
+    for_each_sched_unit_vcpu ( unit, vunit )
+        if ( vunit != v )
+            cnt++;
+
+    v->sched_unit = NULL;
+    unit->runstate_cnt[v->runstate.state]--;
+
+    if ( unit->vcpu_list == v )
+        unit->vcpu_list = v->next_in_list;
+
+    if ( !cnt )
+        sched_free_unit_mem(unit);
+}
+
+static void sched_unit_add_vcpu(struct sched_unit *unit, struct vcpu *v)
+{
+    v->sched_unit = unit;
+
+    /* All but idle vcpus are allocated with sequential vcpu_id. */
+    if ( !unit->vcpu_list || unit->vcpu_list->vcpu_id > v->vcpu_id )
+    {
+        unit->vcpu_list = v;
+        /*
+         * unit_id is always the same as lowest vcpu_id of unit.
+         * This is used for stopping for_each_sched_unit_vcpu() loop and in
+         * order to support cpupools with different granularities.
+         */
+        unit->unit_id = v->vcpu_id;
+    }
+    unit->runstate_cnt[v->runstate.state]++;
+}
+
 static struct sched_unit *sched_alloc_unit(struct vcpu *v)
 {
     struct sched_unit *unit, **prev_unit;
     struct domain *d = v->domain;
 
+    for_each_sched_unit ( d, unit )
+        if ( unit->unit_id / sched_granularity ==
+             v->vcpu_id / sched_granularity )
+            break;
+
+    if ( unit )
+    {
+        sched_unit_add_vcpu(unit, v);
+        return unit;
+    }
+
     if ( (unit = xzalloc(struct sched_unit)) == NULL )
         return NULL;
 
-    unit->vcpu_list = v;
-    unit->unit_id = v->vcpu_id;
     unit->domain = d;
-    unit->runstate_cnt[v->runstate.state]++;
+    sched_unit_add_vcpu(unit, v);
 
     for ( prev_unit = &d->sched_unit_list; *prev_unit;
           prev_unit = &(*prev_unit)->next_in_list )
@@ -404,12 +449,10 @@ static struct sched_unit *sched_alloc_unit(struct vcpu *v)
          !zalloc_cpumask_var(&unit->cpu_soft_affinity) )
         goto fail;
 
-    v->sched_unit = unit;
-
     return unit;
 
  fail:
-    sched_free_unit(unit);
+    sched_free_unit(unit, v);
     return NULL;
 }
 
@@ -459,21 +502,26 @@ int sched_init_vcpu(struct vcpu *v)
     else
         processor = sched_select_initial_cpu(v);
 
-    sched_set_res(unit, get_sched_res(processor));
-
     /* Initialise the per-vcpu timers. */
     spin_lock_init(&v->periodic_timer_lock);
-    init_timer(&v->periodic_timer, vcpu_periodic_timer_fn,
-               v, v->processor);
-    init_timer(&v->singleshot_timer, vcpu_singleshot_timer_fn,
-               v, v->processor);
-    init_timer(&v->poll_timer, poll_timer_fn,
-               v, v->processor);
+    init_timer(&v->periodic_timer, vcpu_periodic_timer_fn, v, processor);
+    init_timer(&v->singleshot_timer, vcpu_singleshot_timer_fn, v, processor);
+    init_timer(&v->poll_timer, poll_timer_fn, v, processor);
+
+    /* If this is not the first vcpu of the unit we are done. */
+    if ( unit->priv != NULL )
+    {
+        v->processor = processor;
+        return 0;
+    }
+
+    /* The first vcpu of an unit can be set via sched_set_res(). */
+    sched_set_res(unit, get_sched_res(processor));
 
     unit->priv = sched_alloc_udata(dom_scheduler(d), unit, d->sched_priv);
     if ( unit->priv == NULL )
     {
-        sched_free_unit(unit);
+        sched_free_unit(unit, v);
         return 1;
     }
 
@@ -633,9 +681,16 @@ void sched_destroy_vcpu(struct vcpu *v)
     kill_timer(&v->poll_timer);
     if ( test_and_clear_bool(v->is_urgent) )
         atomic_dec(&per_cpu(sched_urgent_count, v->processor));
-    sched_remove_unit(vcpu_scheduler(v), unit);
-    sched_free_udata(vcpu_scheduler(v), unit->priv);
-    sched_free_unit(unit);
+    /*
+     * Vcpus are being destroyed top-down. So being the first vcpu of an unit
+     * is the same as being the only one.
+     */
+    if ( unit->vcpu_list == v )
+    {
+        sched_remove_unit(vcpu_scheduler(v), unit);
+        sched_free_udata(vcpu_scheduler(v), unit->priv);
+        sched_free_unit(unit, v);
+    }
 }
 
 int sched_init_domain(struct domain *d, int poolid)
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.