Xen project Mailing List

Re: [Xen-devel] [PATCH] xen/sched_credit: Use delay to control scheduling frequency

To: Keir Fraser <keir@xxxxxxx>

From: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>

Date: Tue, 24 Jan 2012 12:01:53 +0000

Cc: xen-devel@xxxxxxxxxxxxxxxxxxx, raistlin@xxxxxxxx, JBeulich@xxxxxxxx

Delivery-date: Tue, 24 Jan 2012 12:02:24 +0000

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Ping? On Tue, Jan 10, 2012 at 10:05 AM, George Dunlap <George.Dunlap@xxxxxxxxxxxxx> wrote: > On Mon, Jan 9, 2012 at 10:22 AM, Hui Lv <hui.lv@xxxxxxxxx> wrote: >> Updated the warning sentence for ratelimit_us. >> >> This patch can improve Xen performance: >> 1. Basically, the "delay method" can achieve 11% overall performance boost >> for SPECvirt than original credit scheduler. >> 2. We have tried 1ms delay and 10ms delay, there is no big difference >> between these two configurations. (1ms is enough to achieve a good >> performance) >> 3. We have compared different load level response time/latency (low, high, >> peak), "delay method" didn't bring very much response time increase. >> 4. 1ms delay can reduce 30% context switch at peak performance, where >> produces the benefits. (int sched_ratelimit_us = 1000 is the recommended >> setting) >> >> Signed-off-by: Hui Lv <hui.lv@xxxxxxxxx> >> Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> >> >> Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> > > Just confirming this: > Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> > > Thanks, Hui. > -George > >> >> diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/sched_credit.c >> --- a/xen/common/sched_credit.c Fri Dec 16 18:46:27 2011 +0000 >> +++ b/xen/common/sched_credit.c Mon Jan 09 05:21:35 2012 -0500 >> @@ -172,6 +172,7 @@ struct csched_private { >> uint32_t credit; >> int credit_balance; >> uint32_t runq_sort; >> + unsigned ratelimit_us; >> /* Period of master and tick in milliseconds */ >> unsigned tslice_ms, tick_period_us, ticks_per_tslice; >> unsigned credits_per_tslice; >> @@ -1297,10 +1298,15 @@ csched_schedule( >> struct csched_private *prv = CSCHED_PRIV(ops); >> struct csched_vcpu *snext; >> struct task_slice ret; >> + s_time_t runtime, tslice; >> >> CSCHED_STAT_CRANK(schedule); >> CSCHED_VCPU_CHECK(current); >> >> + runtime = now - current->runstate.state_entry_time; >> + if ( runtime < 0 ) /* Does this ever happen? */ >> + runtime = 0; >> + >> if ( !is_idle_vcpu(scurr->vcpu) ) >> { >> /* Update credits of a non-idle VCPU. */ >> @@ -1313,6 +1319,35 @@ csched_schedule( >> scurr->pri = CSCHED_PRI_IDLE; >> } >> >> + /* Choices, choices: >> + * - If we have a tasklet, we need to run the idle vcpu no matter what. >> + * - If sched rate limiting is in effect, and the current vcpu has >> + * run for less than that amount of time, continue the current one, >> + * but with a shorter timeslice and return it immediately >> + * - Otherwise, chose the one with the highest priority (which may >> + * be the one currently running) >> + * - If the currently running one is TS_OVER, see if there >> + * is a higher priority one waiting on the runqueue of another >> + * cpu and steal it. >> + */ >> + >> + /* If we have schedule rate limiting enabled, check to see >> + * how long we've run for. */ >> + if ( !tasklet_work_scheduled >> + && prv->ratelimit_us >> + && vcpu_runnable(current) >> + && !is_idle_vcpu(current) >> + && runtime < MICROSECS(prv->ratelimit_us) ) >> + { >> + snext = scurr; >> + snext->start_time += now; >> + perfc_incr(delay_ms); >> + tslice = MICROSECS(prv->ratelimit_us); >> + ret.migrated = 0; >> + goto out; >> + } >> + tslice = MILLISECS(prv->tslice_ms); >> + >> /* >> * Select next runnable local VCPU (ie top of local runq) >> */ >> @@ -1367,11 +1402,12 @@ csched_schedule( >> if ( !is_idle_vcpu(snext->vcpu) ) >> snext->start_time += now; >> >> +out: >> /* >> * Return task to run next... >> */ >> ret.time = (is_idle_vcpu(snext->vcpu) ? >> - -1 : MILLISECS(prv->tslice_ms)); >> + -1 : tslice); >> ret.task = snext->vcpu; >> >> CSCHED_VCPU_CHECK(ret.task); >> @@ -1533,6 +1569,15 @@ csched_init(struct scheduler *ops) >> prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice; >> prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms; >> >> + if ( MICROSECS(sched_ratelimit_us) > MILLISECS(sched_credit_tslice_ms) ) >> + { >> + printk("WARNING: sched_ratelimit_us >" >> + "sched_credit_tslice_ms is undefined\n" >> + "Setting ratelimit_us to 1000 * tslice_ms\n"); >> + prv->ratelimit_us = 1000 * prv->tslice_ms; >> + } >> + else >> + prv->ratelimit_us = sched_ratelimit_us; >> return 0; >> } >> >> diff -r a4bff36780a3 -r fe8d0ca867aa xen/common/schedule.c >> --- a/xen/common/schedule.c Fri Dec 16 18:46:27 2011 +0000 >> +++ b/xen/common/schedule.c Mon Jan 09 05:21:35 2012 -0500 >> @@ -47,6 +47,11 @@ string_param("sched", opt_sched); >> bool_t sched_smt_power_savings = 0; >> boolean_param("sched_smt_power_savings", sched_smt_power_savings); >> >> +/* Default scheduling rate limit: 1ms >> + * The behavior when sched_ratelimit_us is greater than >> sched_credit_tslice_ms is undefined >> + * */ >> +int sched_ratelimit_us = 1000; >> +integer_param("sched_ratelimit_us", sched_ratelimit_us); >> /* Various timer handlers. */ >> static void s_timer_fn(void *unused); >> static void vcpu_periodic_timer_fn(void *data); >> diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/perfc_defn.h >> --- a/xen/include/xen/perfc_defn.h Fri Dec 16 18:46:27 2011 +0000 >> +++ b/xen/include/xen/perfc_defn.h Mon Jan 09 05:21:35 2012 -0500 >> @@ -16,6 +16,7 @@ PERFCOUNTER(sched_irq, "sch >> PERFCOUNTER(sched_run, "sched: runs through scheduler") >> PERFCOUNTER(sched_ctx, "sched: context switches") >> >> +PERFCOUNTER(delay_ms, "csched: delay") >> PERFCOUNTER(vcpu_check, "csched: vcpu_check") >> PERFCOUNTER(schedule, "csched: schedule") >> PERFCOUNTER(acct_run, "csched: acct_run") >> diff -r a4bff36780a3 -r fe8d0ca867aa xen/include/xen/sched-if.h >> --- a/xen/include/xen/sched-if.h Fri Dec 16 18:46:27 2011 +0000 >> +++ b/xen/include/xen/sched-if.h Mon Jan 09 05:21:35 2012 -0500 >> @@ -16,6 +16,11 @@ extern struct cpupool *cpupool0; >> /* cpus currently in no cpupool */ >> extern cpumask_t cpupool_free_cpus; >> >> +/* Scheduler generic parameters >> + * */ >> +extern int sched_ratelimit_us; >> + >> + >> /* >> * In order to allow a scheduler to remap the lock->cpu mapping, >> * we have a per-cpu pointer, along with a pre-allocated set of >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@xxxxxxxxxxxxxxxxxxx >> http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.