Xen project Mailing List

[Xen-devel] [PATCH 1/2] sched: credit2: respect per-vcpu hard affinity

From: "Justin T. Weaver" <jtweaver@xxxxxxxxxx>

Date: Sat, 29 Nov 2014 14:33:25 -1000

Cc: george.dunlap@xxxxxxxxxxxxx, dario.faggioli@xxxxxxxxxx, "Justin T. Weaver" <jtweaver@xxxxxxxxxx>, henric@xxxxxxxxxx

Delivery-date: Sun, 30 Nov 2014 00:30:35 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

by making sure that vcpus only run on the pcpu(s) they are allowed to run on based on their hard affinity cpu masks. Signed-off-by: Justin T. Weaver <jtweaver@xxxxxxxxxx> --- xen/common/sched_credit2.c | 199 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 171 insertions(+), 28 deletions(-) diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index 1bcd6c0..90e9cdf 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -501,8 +501,9 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu * goto tickle; } - /* Get a mask of idle, but not tickled */ + /* Get a mask of idle, but not tickled, that new is allowed to run on. */ cpumask_andnot(&mask, &rqd->idle, &rqd->tickled); + cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity); /* If it's not empty, choose one */ i = cpumask_cycle(cpu, &mask); @@ -513,9 +514,11 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu * } /* Otherwise, look for the non-idle cpu with the lowest credit, - * skipping cpus which have been tickled but not scheduled yet */ + * skipping cpus which have been tickled but not scheduled yet, + * that new is allowed to run on. */ cpumask_andnot(&mask, &rqd->active, &rqd->idle); cpumask_andnot(&mask, &mask, &rqd->tickled); + cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity); for_each_cpu(i, &mask) { @@ -1038,6 +1041,7 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc) int i, min_rqi = -1, new_cpu; struct csched2_vcpu *svc = CSCHED2_VCPU(vc); s_time_t min_avgload; + cpumask_t temp_mask; BUG_ON(cpumask_empty(&prv->active_queues)); @@ -1053,7 +1057,7 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc) * * Since one of the runqueue locks is already held, we can't * just grab the prv lock. Instead, we'll have to trylock, and - * do something else reasonable if we fail. + * return a safe cpu. */ if ( !spin_trylock(&prv->lock) ) @@ -1063,9 +1067,23 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc) d2printk("%pv -\n", svc->vcpu); clear_bit(__CSFLAG_runq_migrate_request, &svc->flags); } - /* Leave it where it is for now. When we actually pay attention - * to affinity we'll have to figure something out... */ - return vc->processor; + + /* Check vc's hard affinity mask with the run queue's active mask. */ + cpumask_and(&temp_mask, vc->cpu_hard_affinity, &svc->rqd->active); + if ( cpumask_empty(&temp_mask) ) + { + /* Can't be assigned to current runqueue; return a safe pcpu. */ + cpumask_and(&temp_mask, vc->cpu_hard_affinity, + cpupool_online_cpumask(vc->domain->cpupool)); + return cpumask_any(&temp_mask); + } + else + if ( cpumask_test_cpu(vc->processor, &temp_mask) ) + /* Leave it where it is. */ + return vc->processor; + else + /* Same runq, different cpu; affinity must have changed. */ + return cpumask_any(&temp_mask); } /* First check to see if we're here because someone else suggested a place @@ -1081,13 +1099,17 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc) else { d2printk("%pv +\n", svc->vcpu); - new_cpu = cpumask_cycle(vc->processor, &svc->migrate_rqd->active); - goto out_up; + cpumask_and(&temp_mask, vc->cpu_hard_affinity, + &svc->migrate_rqd->active); + if ( !cpumask_empty(&temp_mask) ) + { + new_cpu = cpumask_any(&temp_mask); + goto out_up; + } + /* Fall-through to normal cpu pick */ } } - /* FIXME: Pay attention to cpu affinity */ - min_avgload = MAX_LOAD; /* Find the runqueue with the lowest instantaneous load */ @@ -1099,17 +1121,26 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc) rqd = prv->rqd + i; /* If checking a different runqueue, grab the lock, - * read the avg, and then release the lock. + * check hard affinity, read the avg, and then release the lock. * * If on our own runqueue, don't grab or release the lock; * but subtract our own load from the runqueue load to simulate * impartiality */ if ( rqd == svc->rqd ) { + cpumask_and(&temp_mask, vc->cpu_hard_affinity, &rqd->active); + if ( cpumask_empty(&temp_mask) ) + continue; rqd_avgload = rqd->b_avgload - svc->avgload; } else if ( spin_trylock(&rqd->lock) ) { + cpumask_and(&temp_mask, vc->cpu_hard_affinity, &rqd->active); + if ( cpumask_empty(&temp_mask) ) + { + spin_unlock(&rqd->lock); + continue; + } rqd_avgload = rqd->b_avgload; spin_unlock(&rqd->lock); } @@ -1123,12 +1154,30 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc) } } - /* We didn't find anyone (most likely because of spinlock contention); leave it where it is */ if ( min_rqi == -1 ) - new_cpu = vc->processor; + { + /* No runqs found (most likely because of spinlock contention). */ + cpumask_and(&temp_mask, vc->cpu_hard_affinity, &svc->rqd->active); + if ( cpumask_empty(&temp_mask) ) + { + /* Can't be assigned to current runqueue; return a safe pcpu. */ + cpumask_and(&temp_mask, vc->cpu_hard_affinity, + cpupool_online_cpumask(vc->domain->cpupool)); + new_cpu = cpumask_any(&temp_mask); + } + else + if ( cpumask_test_cpu(vc->processor, &temp_mask) ) + /* Leave it where it is. */ + new_cpu = vc->processor; + else + /* Same runq, different cpu; affinity must have changed. */ + new_cpu = cpumask_any(&temp_mask); + } else { - new_cpu = cpumask_cycle(vc->processor, &prv->rqd[min_rqi].active); + cpumask_and(&temp_mask, vc->cpu_hard_affinity, + &prv->rqd[min_rqi].active); + new_cpu = cpumask_any(&temp_mask); BUG_ON(new_cpu >= nr_cpu_ids); } @@ -1197,22 +1246,40 @@ static void migrate(const struct scheduler *ops, } else { - int on_runq=0; - /* It's not running; just move it */ + /* It's not running; move it if it's on a different runq than trqd. */ + bool_t on_runq = 0; + cpumask_t temp_mask; + d2printk("%pv %d-%d i\n", svc->vcpu, svc->rqd->id, trqd->id); + + /* Re-assign vcpu's processor, if necessary. */ + cpumask_and(&temp_mask, svc->vcpu->cpu_hard_affinity, &trqd->active); + svc->vcpu->processor = cpumask_any(&temp_mask); + if ( !cpumask_test_cpu(svc->vcpu->processor, &temp_mask) ) + svc->vcpu->processor = cpumask_any(&temp_mask); + if ( __vcpu_on_runq(svc) ) + on_runq = 1; + + /* If the runqs are different, move svc to trqd. */ + if ( svc->rqd != trqd ) { - __runq_remove(svc); - update_load(ops, svc->rqd, svc, -1, now); - on_runq=1; + if ( on_runq ) + { + __runq_remove(svc); + update_load(ops, svc->rqd, svc, -1, now); + } + __runq_deassign(svc); + __runq_assign(svc, trqd); + if ( on_runq ) + { + update_load(ops, svc->rqd, svc, 1, now); + runq_insert(ops, svc->vcpu->processor, svc); + } } - __runq_deassign(svc); - svc->vcpu->processor = cpumask_any(&trqd->active); - __runq_assign(svc, trqd); + if ( on_runq ) { - update_load(ops, svc->rqd, svc, 1, now); - runq_insert(ops, svc->vcpu->processor, svc); runq_tickle(ops, svc->vcpu->processor, svc, now); } } @@ -1224,6 +1291,7 @@ static void balance_load(const struct scheduler *ops, int cpu, s_time_t now) struct csched2_private *prv = CSCHED2_PRIV(ops); int i, max_delta_rqi = -1; struct list_head *push_iter, *pull_iter; + cpumask_t temp_mask; balance_state_t st = { .best_push_svc = NULL, .best_pull_svc = NULL }; @@ -1250,6 +1318,11 @@ retry: for_each_cpu(i, &prv->active_queues) { s_time_t delta; + /* true if there are no vcpus to push due to hard affinity */ + bool_t ha_no_push = 1; + /* true if there are no vcpus to pull due to hard affinity */ + bool_t ha_no_pull = 1; + struct list_head *iter; st.orqd = prv->rqd + i; @@ -1257,6 +1330,47 @@ retry: || !spin_trylock(&st.orqd->lock) ) continue; + /* + * If due to hard affinity there are no vcpus that can be + * pulled or pushed, move to the next runq in the loop. + */ + + /* See if there are any vcpus that can be pushed from lrqd to orqd. */ + list_for_each( iter, &st.lrqd->svc ) + { + struct csched2_vcpu * svc = + list_entry(iter, struct csched2_vcpu, rqd_elem); + cpumask_and(&temp_mask, svc->vcpu->cpu_hard_affinity, + &st.orqd->active); + if (!cpumask_empty(&temp_mask)) + { + /* vcpu can be pushed from lrqd to ordq. */ + ha_no_push = 0; + break; + } + } + + /* See if there are any vcpus that can be pulled from orqd to lrqd. */ + list_for_each( iter, &st.orqd->svc ) + { + struct csched2_vcpu * svc = + list_entry(iter, struct csched2_vcpu, rqd_elem); + cpumask_and(&temp_mask, svc->vcpu->cpu_hard_affinity, + &st.lrqd->active); + if (!cpumask_empty(&temp_mask)) + { + /* vcpu can be pulled from orqd to lrdq. */ + ha_no_pull = 0; + break; + } + } + + if ( ha_no_push && ha_no_pull ) + { + spin_unlock(&st.orqd->lock); + continue; + } + __update_runq_load(ops, st.orqd, 0, now); delta = st.lrqd->b_avgload - st.orqd->b_avgload; @@ -1330,6 +1444,12 @@ retry: if ( test_bit(__CSFLAG_runq_migrate_request, &push_svc->flags) ) continue; + /* Skip if it can't run on the destination runq. */ + cpumask_and(&temp_mask, push_svc->vcpu->cpu_hard_affinity, + &st.orqd->active); + if ( cpumask_empty(&temp_mask) ) + continue; + list_for_each( pull_iter, &st.orqd->svc ) { struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct csched2_vcpu, rqd_elem); @@ -1338,11 +1458,17 @@ retry: { __update_svc_load(ops, pull_svc, 0, now); } - + /* Skip this one if it's already been flagged to migrate */ if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) ) continue; + /* Skip if it can't run on the destination runq. */ + cpumask_and(&temp_mask, pull_svc->vcpu->cpu_hard_affinity, + &st.lrqd->active); + if ( cpumask_empty(&temp_mask) ) + continue; + consider(&st, push_svc, pull_svc); } @@ -1355,11 +1481,17 @@ retry: list_for_each( pull_iter, &st.orqd->svc ) { struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct csched2_vcpu, rqd_elem); - + /* Skip this one if it's already been flagged to migrate */ if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) ) continue; + /* Skip if it can't run on the destination runq. */ + cpumask_and(&temp_mask, pull_svc->vcpu->cpu_hard_affinity, + &st.lrqd->active); + if ( cpumask_empty(&temp_mask) ) + continue; + /* Consider pull only */ consider(&st, NULL, pull_svc); } @@ -1399,8 +1531,12 @@ csched2_vcpu_migrate( trqd = RQD(ops, new_cpu); - if ( trqd != svc->rqd ) - migrate(ops, svc, trqd, NOW()); + /* + * Call migrate even if svc->rqd == trqd; there may have been an + * affinity change that requires a call to runq_tickle for a new + * processor within the same run queue. + */ + migrate(ops, svc, trqd, NOW()); } static int @@ -1610,6 +1746,13 @@ runq_candidate(struct csched2_runqueue_data *rqd, { struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem); + /* + * If vcpu is not allowed to run on this processor due to + * hard affinity, continue to the next vcpu on the queue. + */ + if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) ) + continue; + /* If this is on a different processor, don't pull it unless * its credit is at least CSCHED2_MIGRATE_RESIST higher. */ if ( svc->vcpu->processor != cpu -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.