|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 1/1] xen: credit2: rb-tree for runqueues
The patch optimized the sorted credit2 runq from simple linked list to
rb-tree implementation. This way we will gain performance and
scalability when the number of vCPUs are huge.
Signed-off-by: Praveen Kumar <kpraveen.lkml@xxxxxxxxx>
---
xen/common/sched_credit2.c | 94 ++++++++++++++++++++++++++++++----------------
1 file changed, 61 insertions(+), 33 deletions(-)
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 9a3e71f1c8..3802c2888f 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -25,6 +25,7 @@
#include <xen/trace.h>
#include <xen/cpu.h>
#include <xen/keyhandler.h>
+#include <xen/rbtree.h>
/* Meant only for helping developers during debugging. */
/* #define d2printk printk */
@@ -471,7 +472,7 @@ custom_param("credit2_runqueue", parse_credit2_runqueue);
struct csched2_runqueue_data {
spinlock_t lock; /* Lock for this runqueue */
- struct list_head runq; /* Ordered list of runnable vms */
+ struct rb_root runq; /* Runqueue is an rbtree */
int id; /* ID of this runqueue (-1 if invalid) */
int load; /* Instantaneous load (num of non-idle vcpus) */
@@ -536,7 +537,7 @@ struct csched2_vcpu {
s_time_t load_last_update; /* Last time average was updated
*/
s_time_t avgload; /* Decaying queue load
*/
- struct list_head runq_elem; /* On the runqueue (rqd->runq)
*/
+ struct rb_node runq_elem; /* On the runqueue (rqd->runq)
*/
struct list_head parked_elem; /* On the parked_vcpus list
*/
struct list_head rqd_elem; /* On csched2_runqueue_data's svc list
*/
struct csched2_runqueue_data *migrate_rqd; /* Pre-determined migr. target
*/
@@ -600,6 +601,29 @@ static inline bool has_cap(const struct csched2_vcpu *svc)
return svc->budget != STIME_MAX;
}
+static void runq_insert_rb(struct rb_root *root,
+ struct csched2_vcpu *svc,
+ int *pos)
+{
+ struct csched2_vcpu *entry = NULL;
+ struct rb_node **node = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*node) {
+ parent = *node;
+ entry = rb_entry(parent, struct csched2_vcpu, runq_elem);
+ // Check if we are maintaining the sorted
+ if ( svc->credit < entry->credit )
+ node = &parent->rb_left;
+ else
+ node = &parent->rb_right;
+
+ (*pos)++;
+ }
+ rb_link_node(&svc->runq_elem, parent, node);
+ rb_insert_color(&svc->runq_elem, root);
+}
+
/*
* Hyperthreading (SMT) support.
*
@@ -793,12 +817,12 @@ static s_time_t c2t(struct csched2_runqueue_data *rqd,
s_time_t credit, struct c
static inline int vcpu_on_runq(struct csched2_vcpu *svc)
{
- return !list_empty(&svc->runq_elem);
+ return !RB_EMPTY_NODE(&svc->runq_elem);
}
-static inline struct csched2_vcpu * runq_elem(struct list_head *elem)
+static inline struct csched2_vcpu * runq_elem(struct rb_node *elem)
{
- return list_entry(elem, struct csched2_vcpu, runq_elem);
+ return rb_entry(elem, struct csched2_vcpu, runq_elem);
}
static void activate_runqueue(struct csched2_private *prv, int rqi)
@@ -812,7 +836,7 @@ static void activate_runqueue(struct csched2_private *prv,
int rqi)
rqd->max_weight = 1;
rqd->id = rqi;
INIT_LIST_HEAD(&rqd->svc);
- INIT_LIST_HEAD(&rqd->runq);
+ rqd->runq = RB_ROOT;
spin_lock_init(&rqd->lock);
__cpumask_set_cpu(rqi, &prv->active_queues);
@@ -1272,9 +1296,8 @@ update_load(const struct scheduler *ops,
static void
runq_insert(const struct scheduler *ops, struct csched2_vcpu *svc)
{
- struct list_head *iter;
unsigned int cpu = svc->vcpu->processor;
- struct list_head * runq = &c2rqd(ops, cpu)->runq;
+ struct rb_root *runq = &c2rqd(ops, cpu)->runq;
int pos = 0;
ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
@@ -1287,16 +1310,7 @@ runq_insert(const struct scheduler *ops, struct
csched2_vcpu *svc)
ASSERT(!svc->vcpu->is_running);
ASSERT(!(svc->flags & CSFLAG_scheduled));
- list_for_each( iter, runq )
- {
- struct csched2_vcpu * iter_svc = runq_elem(iter);
-
- if ( svc->credit > iter_svc->credit )
- break;
-
- pos++;
- }
- list_add_tail(&svc->runq_elem, iter);
+ runq_insert_rb(runq, svc, &pos);
if ( unlikely(tb_init_done) )
{
@@ -1315,8 +1329,11 @@ runq_insert(const struct scheduler *ops, struct
csched2_vcpu *svc)
static inline void runq_remove(struct csched2_vcpu *svc)
{
+ // TODO This assert might not be required, as we always have a check before
+ // calling this API
ASSERT(vcpu_on_runq(svc));
- list_del_init(&svc->runq_elem);
+ rb_erase(&svc->runq_elem, &svc->rqd->runq);
+ RB_CLEAR_NODE(&svc->runq_elem);
}
void burn_credits(struct csched2_runqueue_data *rqd, struct csched2_vcpu *,
s_time_t);
@@ -1785,6 +1802,7 @@ static void park_vcpu(struct csched2_vcpu *svc)
* In both cases, we also add it to the list of parked vCPUs of the domain.
*/
__set_bit(_VPF_parked, &v->pause_flags);
+
if ( vcpu_on_runq(svc) )
{
runq_remove(svc);
@@ -2037,7 +2055,7 @@ csched2_alloc_vdata(const struct scheduler *ops, struct
vcpu *vc, void *dd)
return NULL;
INIT_LIST_HEAD(&svc->rqd_elem);
- INIT_LIST_HEAD(&svc->runq_elem);
+ RB_CLEAR_NODE(&svc->runq_elem);
svc->sdom = dd;
svc->vcpu = vc;
@@ -2083,6 +2101,8 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct
vcpu *vc)
}
else if ( vcpu_on_runq(svc) )
{
+ printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__,
__LINE__);
+
ASSERT(svc->rqd == c2rqd(ops, vc->processor));
update_load(ops, svc->rqd, svc, -1, NOW());
runq_remove(svc);
@@ -2110,6 +2130,7 @@ csched2_vcpu_wake(const struct scheduler *ops, struct
vcpu *vc)
if ( unlikely(vcpu_on_runq(svc)) )
{
+ printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__,
__LINE__);
SCHED_STAT_CRANK(vcpu_wake_onrunq);
goto out;
}
@@ -2501,6 +2522,7 @@ static void migrate(const struct scheduler *ops,
/* It's not running; just move it */
if ( vcpu_on_runq(svc) )
{
+ printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__,
__LINE__);
runq_remove(svc);
update_load(ops, svc->rqd, NULL, -1, now);
on_runq = 1;
@@ -2759,8 +2781,10 @@ csched2_vcpu_migrate(
if ( unlikely(!cpumask_test_cpu(new_cpu, cpupool_domain_cpumask(d))) )
{
ASSERT(system_state == SYS_STATE_suspend);
+
if ( vcpu_on_runq(svc) )
{
+ printk(XENLOG_WARNING "%s : %d : vcpu on runq rb !\n", __func__,
__LINE__);
runq_remove(svc);
update_load(ops, svc->rqd, NULL, -1, now);
}
@@ -3103,7 +3127,7 @@ csched2_vcpu_insert(const struct scheduler *ops, struct
vcpu *vc)
spinlock_t *lock;
ASSERT(!is_idle_vcpu(vc));
- ASSERT(list_empty(&svc->runq_elem));
+ ASSERT(RB_EMPTY_NODE(&svc->runq_elem));
/* csched2_cpu_pick() expects the pcpu lock to be held */
lock = vcpu_schedule_lock_irq(vc);
@@ -3141,7 +3165,7 @@ csched2_vcpu_remove(const struct scheduler *ops, struct
vcpu *vc)
spinlock_t *lock;
ASSERT(!is_idle_vcpu(vc));
- ASSERT(list_empty(&svc->runq_elem));
+ ASSERT(RB_EMPTY_NODE(&svc->runq_elem));
SCHED_STAT_CRANK(vcpu_remove);
@@ -3163,7 +3187,7 @@ csched2_runtime(const struct scheduler *ops, int cpu,
s_time_t time, min_time;
int rt_credit; /* Proposed runtime measured in credits */
struct csched2_runqueue_data *rqd = c2rqd(ops, cpu);
- struct list_head *runq = &rqd->runq;
+ struct rb_root *runq = &rqd->runq;
struct csched2_private *prv = csched2_priv(ops);
/*
@@ -3201,17 +3225,21 @@ csched2_runtime(const struct scheduler *ops, int cpu,
* 2) If there's someone waiting whose credit is positive,
* run until your credit ~= his.
*/
- if ( ! list_empty(runq) )
+ if ( ! RB_EMPTY_ROOT(runq) )
{
- struct csched2_vcpu *swait = runq_elem(runq->next);
+ // Find the left most element, which is the most probable candidate
+ struct rb_node *node = rb_first(runq);
+ // TODO Can we take rb_next ?
+ //struct rb_node *node = &rb_next(root->rb_node);
+
+ struct csched2_vcpu *swait = runq_elem(node);
if ( ! is_idle_vcpu(swait->vcpu)
&& swait->credit > 0 )
{
rt_credit = snext->credit - swait->credit;
}
}
-
/*
* The next guy on the runqueue may actually have a higher credit,
* if we've tried to avoid migrating him from a different cpu.
@@ -3260,7 +3288,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
int cpu, s_time_t now,
unsigned int *skipped)
{
- struct list_head *iter, *temp;
+ struct rb_node *iter = NULL;
struct csched2_vcpu *snext = NULL;
struct csched2_private *prv = csched2_priv(per_cpu(scheduler, cpu));
bool yield = false, soft_aff_preempt = false;
@@ -3345,9 +3373,8 @@ runq_candidate(struct csched2_runqueue_data *rqd,
snext = csched2_vcpu(idle_vcpu[cpu]);
check_runq:
- list_for_each_safe( iter, temp, &rqd->runq )
- {
- struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu,
runq_elem);
+ for (iter = rb_first(&rqd->runq); iter != NULL; iter = rb_next(iter)) {
+ struct csched2_vcpu * svc = rb_entry(iter, struct csched2_vcpu,
runq_elem);
if ( unlikely(tb_init_done) )
{
@@ -3749,7 +3776,8 @@ csched2_dump(const struct scheduler *ops)
for_each_cpu(i, &prv->active_queues)
{
struct csched2_runqueue_data *rqd = prv->rqd + i;
- struct list_head *iter, *runq = &rqd->runq;
+ struct rb_root *runq = &rqd->runq;
+ struct rb_node *iter;
int loop = 0;
/* We need the lock to scan the runqueue. */
@@ -3761,8 +3789,8 @@ csched2_dump(const struct scheduler *ops)
dump_pcpu(ops, j);
printk("RUNQ:\n");
- list_for_each( iter, runq )
- {
+
+ for (iter = rb_first(runq); iter != NULL; iter = rb_next(iter)) {
struct csched2_vcpu *svc = runq_elem(iter);
if ( svc )
--
2.13.6
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |