|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4] xen: sched: convert RTDS from time to event driven model
v4 is meant for discussion on the addition of replq.
Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.
Bugs to be fixed: Cpupool and locks. When a pcpu is removed from a
pool and added to another, the lock equality assert in free_pdata()
fails when Pool-0 is using rtds.
This patch is based on master branch after commit 2e46e3
x86/mce: fix misleading indentation in init_nonfatal_mce_checker()
Signed-off-by: Tianyang Chen <tiche@xxxxxxxxxxxxxx>
Signed-off-by: Meng Xu <mengxu@xxxxxxxxxxxxx>
Signed-off-by: Dagaen Golomb <dgolomb@xxxxxxxxxxxxxx>
---
xen/common/sched_rt.c | 262 ++++++++++++++++++++++++++++++++++++-------------
1 file changed, 192 insertions(+), 70 deletions(-)
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..c36e5de 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
#include <xen/delay.h>
#include <xen/event.h>
#include <xen/time.h>
+#include <xen/timer.h>
#include <xen/perfc.h>
#include <xen/sched-if.h>
#include <xen/softirq.h>
@@ -87,7 +88,7 @@
#define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
#define UPDATE_LIMIT_SHIFT 10
-#define MAX_SCHEDULE (MILLISECS(1))
+
/*
* Flags
*/
@@ -142,6 +143,9 @@ static cpumask_var_t *_cpumask_scratch;
*/
static unsigned int nr_rt_ops;
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
/*
* Systme-wide private data, include global RunQueue/DepletedQ
* Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +156,9 @@ struct rt_private {
struct list_head sdom; /* list of availalbe domains, used for dump */
struct list_head runq; /* ordered list of runnable vcpus */
struct list_head depletedq; /* unordered list of depleted vcpus */
+ struct list_head replq; /* ordered list of vcpus that need repl */
cpumask_t tickled; /* cpus been tickled */
+ struct timer *repl_timer; /* replenishment timer */
};
/*
@@ -160,6 +166,7 @@ struct rt_private {
*/
struct rt_vcpu {
struct list_head q_elem; /* on the runq/depletedq list */
+ struct list_head p_elem; /* on the repl event list */
/* Up-pointers */
struct rt_dom *sdom;
@@ -213,8 +220,14 @@ static inline struct list_head *rt_depletedq(const struct
scheduler *ops)
return &rt_priv(ops)->depletedq;
}
+static inline struct list_head *rt_replq(const struct scheduler *ops)
+{
+ return &rt_priv(ops)->replq;
+}
+
/*
- * Queue helper functions for runq and depletedq
+ * Queue helper functions for runq, depletedq
+ * and repl event q
*/
static int
__vcpu_on_q(const struct rt_vcpu *svc)
@@ -228,6 +241,18 @@ __q_elem(struct list_head *elem)
return list_entry(elem, struct rt_vcpu, q_elem);
}
+static struct rt_vcpu *
+__p_elem(struct list_head *elem)
+{
+ return list_entry(elem, struct rt_vcpu, p_elem);
+}
+
+static int
+__vcpu_on_p(const struct rt_vcpu *svc)
+{
+ return !list_empty(&svc->p_elem);
+}
+
/*
* Debug related code, dump vcpu/cpu information
*/
@@ -387,6 +412,13 @@ __q_remove(struct rt_vcpu *svc)
list_del_init(&svc->q_elem);
}
+static inline void
+__p_remove(struct rt_vcpu *svc)
+{
+ if ( __vcpu_on_p(svc) )
+ list_del_init(&svc->p_elem);
+}
+
/*
* Insert svc with budget in RunQ according to EDF:
* vcpus with smaller deadlines go first.
@@ -421,6 +453,32 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu
*svc)
}
/*
+ * Insert svc into the repl even list:
+ * vcpus that needs to be repl earlier go first.
+ */
+static void
+__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+ struct rt_private *prv = rt_priv(ops);
+ struct list_head *replq = rt_replq(ops);
+ struct list_head *iter;
+
+ ASSERT( spin_is_locked(&prv->lock) );
+
+ ASSERT( !__vcpu_on_p(svc) );
+
+ list_for_each(iter, replq)
+ {
+ struct rt_vcpu * iter_svc = __p_elem(iter);
+ if ( svc->cur_deadline <= iter_svc->cur_deadline )
+ break;
+ }
+
+ list_add_tail(&svc->p_elem, iter);
+}
+
+
+/*
* Init/Free related code
*/
static int
@@ -449,6 +507,7 @@ rt_init(struct scheduler *ops)
INIT_LIST_HEAD(&prv->sdom);
INIT_LIST_HEAD(&prv->runq);
INIT_LIST_HEAD(&prv->depletedq);
+ INIT_LIST_HEAD(&prv->replq);
cpumask_clear(&prv->tickled);
@@ -473,6 +532,9 @@ rt_deinit(const struct scheduler *ops)
xfree(_cpumask_scratch);
_cpumask_scratch = NULL;
}
+
+ kill_timer(prv->repl_timer);
+
xfree(prv);
}
@@ -586,6 +648,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu
*vc, void *dd)
return NULL;
INIT_LIST_HEAD(&svc->q_elem);
+ INIT_LIST_HEAD(&svc->p_elem);
svc->flags = 0U;
svc->sdom = dd;
svc->vcpu = vc;
@@ -618,6 +681,10 @@ static void
rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
{
struct rt_vcpu *svc = rt_vcpu(vc);
+ struct rt_private *prv = rt_priv(ops);
+ struct timer *repl_timer;
+ int cpu;
+
s_time_t now = NOW();
spinlock_t *lock;
@@ -632,6 +699,17 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu
*vc)
vcpu_schedule_unlock_irq(lock, vc);
SCHED_STAT_CRANK(vcpu_insert);
+
+ if( prv->repl_timer == NULL )
+ {
+ /* vc->processor has been set in schedule.c */
+ cpu = vc->processor;
+
+ repl_timer = xzalloc(struct timer);
+
+ prv->repl_timer = repl_timer;
+ init_timer(repl_timer, repl_handler, (void *)ops, cpu);
+ }
}
/*
@@ -785,44 +863,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t
*mask)
}
/*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{
- struct list_head *runq = rt_runq(ops);
- struct list_head *depletedq = rt_depletedq(ops);
- struct list_head *iter;
- struct list_head *tmp;
- struct rt_vcpu *svc = NULL;
-
- list_for_each_safe(iter, tmp, runq)
- {
- svc = __q_elem(iter);
- if ( now < svc->cur_deadline )
- break;
-
- rt_update_deadline(now, svc);
- /* reinsert the vcpu if its deadline is updated */
- __q_remove(svc);
- __runq_insert(ops, svc);
- }
-
- list_for_each_safe(iter, tmp, depletedq)
- {
- svc = __q_elem(iter);
- if ( now >= svc->cur_deadline )
- {
- rt_update_deadline(now, svc);
- __q_remove(svc); /* remove from depleted queue */
- __runq_insert(ops, svc); /* add to runq */
- }
- }
-}
-
-/*
* schedule function for rt scheduler.
* The lock is already grabbed in schedule.c, no need to lock here
*/
@@ -841,7 +881,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now,
bool_t tasklet_work_sched
/* burn_budget would return for IDLE VCPU */
burn_budget(ops, scurr, now);
- __repl_update(ops, now);
if ( tasklet_work_scheduled )
{
@@ -868,6 +907,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now,
bool_t tasklet_work_sched
set_bit(__RTDS_delayed_runq_add, &scurr->flags);
snext->last_start = now;
+
+ ret.time = -1; /* if an idle vcpu is picked */
if ( !is_idle_vcpu(snext->vcpu) )
{
if ( snext != scurr )
@@ -880,9 +921,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now,
bool_t tasklet_work_sched
snext->vcpu->processor = cpu;
ret.migrated = 1;
}
+
+ ret.time = snext->budget; /* invoke the scheduler next time */
+
}
- ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
ret.task = snext->vcpu;
/* TRACE */
@@ -924,6 +967,10 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
__q_remove(svc);
else if ( svc->flags & RTDS_delayed_runq_add )
clear_bit(__RTDS_delayed_runq_add, &svc->flags);
+
+ /* stop tracking the repl time of this vcpu */
+ /* if( __vcpu_on_p(svc) )
+ __p_remove(svc); */
}
/*
@@ -1027,23 +1074,21 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu
*vc)
struct rt_vcpu * const svc = rt_vcpu(vc);
s_time_t now = NOW();
struct rt_private *prv = rt_priv(ops);
- struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
- struct rt_dom *sdom = NULL;
- cpumask_t *online;
+ struct timer *repl_timer = prv->repl_timer;
BUG_ON( is_idle_vcpu(vc) );
if ( unlikely(curr_on_cpu(vc->processor) == vc) )
{
SCHED_STAT_CRANK(vcpu_wake_running);
- return;
+ goto out;
}
/* on RunQ/DepletedQ, just update info is ok */
if ( unlikely(__vcpu_on_q(svc)) )
{
SCHED_STAT_CRANK(vcpu_wake_onrunq);
- return;
+ goto out;
}
if ( likely(vcpu_runnable(vc)) )
@@ -1058,25 +1103,39 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu
*vc)
if ( unlikely(svc->flags & RTDS_scheduled) )
{
set_bit(__RTDS_delayed_runq_add, &svc->flags);
- return;
+ goto out;
}
+ /* budget repl here is needed before inserting back to runq. If so,
+ * it should be taken out of replq and put back. This can potentially
+ * cause the timer handler to replenish no vcpu when it triggers if
+ * the replenishment is done here already.
+ */
if ( now >= svc->cur_deadline)
+ {
rt_update_deadline(now, svc);
+ if( __vcpu_on_p(svc) )
+ __p_remove(svc);
+ }
/* insert svc to runq/depletedq because svc is not in queue now */
__runq_insert(ops, svc);
- __repl_update(ops, now);
-
- ASSERT(!list_empty(&prv->sdom));
- sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
- online = cpupool_domain_cpumask(sdom->dom);
- snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */
-
- runq_tickle(ops, snext);
+ runq_tickle(ops, svc);
- return;
+out:
+ /* a newly waken-up vcpu could have an earlier release time
+ * or it could be the first to program the timer
+ */
+ if( repl_timer->expires == 0 || repl_timer->expires > svc->cur_deadline )
+ set_timer(repl_timer,svc->cur_deadline);
+
+ /* start tracking the repl time of this vcpu here
+ * it stays on the replq unless it goes to sleep
+ * or marked as not-runnable
+ */
+ if( !__vcpu_on_p(svc) )
+ __replq_insert(ops, svc);
}
/*
@@ -1087,10 +1146,7 @@ static void
rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
{
struct rt_vcpu *svc = rt_vcpu(vc);
- struct rt_vcpu *snext = NULL;
- struct rt_dom *sdom = NULL;
- struct rt_private *prv = rt_priv(ops);
- cpumask_t *online;
+
spinlock_t *lock = vcpu_schedule_lock_irq(vc);
clear_bit(__RTDS_scheduled, &svc->flags);
@@ -1100,17 +1156,9 @@ rt_context_saved(const struct scheduler *ops, struct
vcpu *vc)
if ( test_and_clear_bit(__RTDS_delayed_runq_add, &svc->flags) &&
likely(vcpu_runnable(vc)) )
- {
- __runq_insert(ops, svc);
- __repl_update(ops, NOW());
- ASSERT(!list_empty(&prv->sdom));
- sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
- online = cpupool_domain_cpumask(sdom->dom);
- snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
+ __runq_insert(ops, svc);
- runq_tickle(ops, snext);
- }
out:
vcpu_schedule_unlock_irq(lock, vc);
}
@@ -1168,6 +1216,80 @@ rt_dom_cntl(
return rc;
}
+/* The replenishment timer handler picks vcpus
+ * from the replq and does the actual replenishment
+ * the replq only keeps track of runnable vcpus
+ */
+static void repl_handler(void *data){
+ unsigned long flags;
+ s_time_t now = NOW();
+ s_time_t t_next = LONG_MAX; /* the next time timer should be triggered */
+ struct scheduler *ops = data;
+ struct rt_private *prv = rt_priv(ops);
+ struct list_head *replq = rt_replq(ops);
+ struct timer *repl_timer = prv->repl_timer;
+ struct list_head *iter, *tmp;
+ struct rt_vcpu *svc = NULL;
+
+ stop_timer(repl_timer);
+
+ spin_lock_irqsave(&prv->lock, flags);
+
+ list_for_each_safe(iter, tmp, replq)
+ {
+ svc = __p_elem(iter);
+
+ if ( now >= svc->cur_deadline )
+ {
+ rt_update_deadline(now, svc);
+
+ if( t_next > svc->cur_deadline )
+ t_next = svc->cur_deadline;
+
+ /* when the replenishment happens
+ * svc is either on a pcpu or on
+ * runq/depletedq
+ */
+ if( __vcpu_on_q(svc) )
+ {
+ /* put back to runq */
+ __q_remove(svc);
+ __runq_insert(ops, svc);
+ runq_tickle(ops, svc);
+ }
+
+ /* resort replq, keep track of this
+ * vcpu if it's still runnable. If
+ * at this point no vcpu are, then
+ * the timer waits for wake() to
+ * program it.
+ */
+ __p_remove(svc);
+
+ if( vcpu_runnable(svc->vcpu) )
+ __replq_insert(ops, svc);
+ }
+
+ else
+ {
+ /* Break out of the loop if a vcpu's
+ * cur_deadline is bigger than now.
+ * Also when some replenishment is done
+ * in wake(), the code could end up here
+ * if the time fires earlier than it should
+ */
+ if( t_next > svc->cur_deadline )
+ t_next = svc->cur_deadline;
+ break;
+ }
+ }
+
+ set_timer(repl_timer, t_next);
+
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+}
+
static struct rt_private _rt_priv;
static const struct scheduler sched_rtds_def = {
--
1.7.9.5
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |