Xen project Mailing List

[Xen-devel] [RFC PATCH 4/4] Changed filenames with sedf to cbs to reflect the actual scheduler

From: Robbie VanVossen <robert.vanvossen@xxxxxxxxxxxxxxx> --- tools/libxc/Makefile | 2 +- tools/libxc/xc_cbs.c | 70 ++++ tools/libxc/xc_sedf.c | 70 ---- xen/common/Makefile | 2 +- xen/common/sched_cbs.c | 917 +++++++++++++++++++++++++++++++++++++++++++++++ xen/common/sched_sedf.c | 917 ----------------------------------------------- 6 files changed, 989 insertions(+), 989 deletions(-) create mode 100755 tools/libxc/xc_cbs.c delete mode 100755 tools/libxc/xc_sedf.c create mode 100644 xen/common/sched_cbs.c delete mode 100644 xen/common/sched_sedf.c diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile index a74b19e..dc06e59 100644 --- a/tools/libxc/Makefile +++ b/tools/libxc/Makefile @@ -16,7 +16,7 @@ CTRL_SRCS-y += xc_misc.c CTRL_SRCS-y += xc_flask.c CTRL_SRCS-y += xc_physdev.c CTRL_SRCS-y += xc_private.c -CTRL_SRCS-y += xc_sedf.c +CTRL_SRCS-y += xc_cbs.c CTRL_SRCS-y += xc_csched.c CTRL_SRCS-y += xc_csched2.c CTRL_SRCS-y += xc_arinc653.c diff --git a/tools/libxc/xc_cbs.c b/tools/libxc/xc_cbs.c new file mode 100755 index 0000000..3b578d1 --- /dev/null +++ b/tools/libxc/xc_cbs.c @@ -0,0 +1,70 @@ +/****************************************************************************** + * xc_cbs.c + * + * API for manipulating parameters of the CBS scheduler. + * + * changes by Stephan Diestelhorst + * based on code + * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xc_private.h" + +int xc_cbs_domain_set( + xc_interface *xch, + uint32_t domid, + uint64_t period, + uint64_t budget, + uint16_t soft) +{ + DECLARE_DOMCTL; + struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t)domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo; + + p->period = period; + p->budget = budget; + p->soft = soft; + return do_domctl(xch, &domctl); +} + +int xc_cbs_domain_get( + xc_interface *xch, + uint32_t domid, + uint64_t *period, + uint64_t *budget, + uint16_t *soft) +{ + DECLARE_DOMCTL; + int ret; + struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t)domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo; + + ret = do_domctl(xch, &domctl); + + *period = p->period; + *budget = p->budget; + *soft = p->soft; + return ret; +} diff --git a/tools/libxc/xc_sedf.c b/tools/libxc/xc_sedf.c deleted file mode 100755 index 3b578d1..0000000 --- a/tools/libxc/xc_sedf.c +++ /dev/null @@ -1,70 +0,0 @@ -/****************************************************************************** - * xc_cbs.c - * - * API for manipulating parameters of the CBS scheduler. - * - * changes by Stephan Diestelhorst - * based on code - * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "xc_private.h" - -int xc_cbs_domain_set( - xc_interface *xch, - uint32_t domid, - uint64_t period, - uint64_t budget, - uint16_t soft) -{ - DECLARE_DOMCTL; - struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs; - - domctl.cmd = XEN_DOMCTL_scheduler_op; - domctl.domain = (domid_t)domid; - domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS; - domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo; - - p->period = period; - p->budget = budget; - p->soft = soft; - return do_domctl(xch, &domctl); -} - -int xc_cbs_domain_get( - xc_interface *xch, - uint32_t domid, - uint64_t *period, - uint64_t *budget, - uint16_t *soft) -{ - DECLARE_DOMCTL; - int ret; - struct xen_domctl_sched_cbs *p = &domctl.u.scheduler_op.u.cbs; - - domctl.cmd = XEN_DOMCTL_scheduler_op; - domctl.domain = (domid_t)domid; - domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_CBS; - domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo; - - ret = do_domctl(xch, &domctl); - - *period = p->period; - *budget = p->budget; - *soft = p->soft; - return ret; -} diff --git a/xen/common/Makefile b/xen/common/Makefile index 3683ae3..f2cb709 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -24,7 +24,7 @@ obj-y += random.o obj-y += rangeset.o obj-y += sched_credit.o obj-y += sched_credit2.o -obj-y += sched_sedf.o +obj-y += sched_cbs.o obj-y += sched_arinc653.o obj-y += schedule.o obj-y += shutdown.o diff --git a/xen/common/sched_cbs.c b/xen/common/sched_cbs.c new file mode 100644 index 0000000..5df4825 --- /dev/null +++ b/xen/common/sched_cbs.c @@ -0,0 +1,917 @@ +/****************************************************************************** + * Constant Bandwidth Server Scheduler for Xen + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * by DornerWorks Ltd. (C) 2014 Grand Rapids, MI + * + * Adapted from code by Stephan Diestelhorst (C) 2004 Cambridge University + * and Mark Williamson (C) 2004 Intel Research Cambridge + * + */ + +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/sched-if.h> +#include <xen/timer.h> +#include <xen/softirq.h> +#include <xen/time.h> +#include <xen/errno.h> + +#ifndef NDEBUG +#define CBS_STATS +#define CHECK(_p) \ + do { \ + if ( !(_p) ) \ + printk("Check '%s' failed, line %d, file %s\n", \ + #_p , __LINE__, __FILE__); \ + } while ( 0 ) +#else +#define CHECK(_p) ((void)0) +#endif + +#define CBS_SOFT_TASK (1) +#define CBS_ASLEEP (16) + +#define DEFAULT_PERIOD (MILLISECS(20)) +#define DEFAULT_BUDGET (MILLISECS(10)) + +#define PERIOD_MAX MILLISECS(10000) /* 10s */ +#define PERIOD_MIN (MICROSECS(10)) /* 10us */ +#define BUDGET_MIN (MICROSECS(5)) /* 5us */ + +#define EQ(_A, _B) ((!!(_A)) == (!!(_B))) + + +struct cbs_dom_info { + struct domain *domain; +}; + +struct cbs_priv_info { + /* lock for the whole pluggable scheduler, nests inside cpupool_lock */ + spinlock_t lock; +}; + +struct cbs_vcpu_info { + struct vcpu *vcpu; + struct list_head list; + + + /* Parameters for EDF-CBS */ + s_time_t period; /* = Server scheduling period */ + s_time_t budget; /* = Guarenteed minimum CPU time per period */ + /* Note: Server bandwidth = (budget / period) */ + + /* Status of vcpu */ + int status; + /* Bookkeeping */ + s_time_t deadl_abs; + s_time_t sched_start_abs; + s_time_t cputime; + /* Times the vcpu un-/blocked */ + s_time_t block_abs; + s_time_t unblock_abs; + +#ifdef CBS_STATS + s_time_t block_time_tot; + int block_tot; + int short_block_tot; + int long_block_tot; + s_time_t miss_time; + s_time_t over_time; + int miss_tot; + int over_tot; +#endif +}; + +struct cbs_cpu_info { + struct list_head runnableq; + struct list_head waitq; + s_time_t current_budget_expires; +}; + +#define CBS_PRIV(_ops) \ + ((struct cbs_priv_info *)((_ops)->sched_data)) +#define CBS_VCPU(_vcpu) ((struct cbs_vcpu_info *)((_vcpu)->sched_priv)) +#define CBS_PCPU(_cpu) \ + ((struct cbs_cpu_info *)per_cpu(schedule_data, _cpu).sched_priv) +#define LIST(_vcpu) (&CBS_VCPU(_vcpu)->list) +#define RUNQ(_cpu) (&CBS_PCPU(_cpu)->runnableq) +#define WAITQ(_cpu) (&CBS_PCPU(_cpu)->waitq) +#define IDLETASK(_cpu) (idle_vcpu[_cpu]) + +#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period) + +#define DIV_UP(_X, _Y) (((_X) + (_Y) - 1) / _Y) + +#define cbs_runnable(edom) (!(CBS_VCPU(edom)->status & CBS_ASLEEP)) + +#define cbs_soft(edom) (CBS_VCPU(edom)->status & CBS_SOFT_TASK) + +static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu); + +static inline int __task_on_queue(struct vcpu *v) +{ + return (((LIST(v))->next != NULL) && (LIST(v)->next != LIST(v))); +} + +static inline void __del_from_queue(struct vcpu *v) +{ + struct list_head *list = LIST(v); + ASSERT(__task_on_queue(v)); + list_del(list); + list->next = NULL; + ASSERT(!__task_on_queue(v)); +} + +typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2); + +static inline void list_insert_sort( + struct list_head *list, struct list_head *element, list_comparer comp) +{ + struct list_head *cur; + + /* Iterate through all elements to find our "hole" */ + list_for_each( cur, list ) + if ( comp(element, cur) < 0 ) + break; + + /* cur now contains the element, before which we'll enqueue */ + list_add(element, cur->prev); +} + +#define VCPU_COMPARER(name, field, comp1, comp2) \ +static int name##_comp(struct list_head* el1, struct list_head* el2) \ +{ \ + struct cbs_vcpu_info *v1, *v2; \ + v1 = list_entry(el1, struct cbs_vcpu_info, field); \ + v2 = list_entry(el2, struct cbs_vcpu_info, field); \ + if ( (comp1) == (comp2) ) \ + return 0; \ + if ( (comp1) < (comp2) ) \ + return -1; \ + else \ + return 1; \ +} + +/* + * Adds a vcpu to the queue of processes which wait for the beginning of the + * next period; this list is therefore sortet by this time, which is simply + * absol. deadline - period. + */ +VCPU_COMPARER(waitq, list, PERIOD_BEGIN(v1), PERIOD_BEGIN(v2)); +static inline void __add_to_waitqueue_sort(struct vcpu *v) +{ + ASSERT(!__task_on_queue(v)); + list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp); + ASSERT(__task_on_queue(v)); +} + +/* + * Adds a vcpu to the queue of processes which have started their current + * period and are runnable (i.e. not blocked, dieing,...). The first element + * on this list is running on the processor, if the list is empty the idle + * task will run. As we are implementing EDF, this list is sorted by deadlines. + */ +VCPU_COMPARER(runq, list, v1->deadl_abs, v2->deadl_abs); +static inline void __add_to_runqueue_sort(struct vcpu *v) +{ + list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp); +} + + +static void cbs_insert_vcpu(const struct scheduler *ops, struct vcpu *v) +{ + if ( is_idle_vcpu(v) ) + { + CBS_VCPU(v)->deadl_abs = 0; + CBS_VCPU(v)->status &= ~CBS_ASLEEP; + } +} + +static void *cbs_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void *dd) +{ + struct cbs_vcpu_info *inf; + + inf = xzalloc(struct cbs_vcpu_info); + if ( inf == NULL ) + return NULL; + + inf->vcpu = v; + + inf->deadl_abs = 0; + inf->cputime = 0; + inf->status = CBS_ASLEEP; + + if (v->domain->domain_id == 0) + { + /* Domain 0, needs a budget to boot the machine */ + inf->period = DEFAULT_PERIOD; + inf->budget = DEFAULT_BUDGET; + } + else + { + inf->period = DEFAULT_PERIOD; + inf->budget = 0; + } + + INIT_LIST_HEAD(&(inf->list)); + + SCHED_STAT_CRANK(vcpu_init); + + return inf; +} + +static void * +cbs_alloc_pdata(const struct scheduler *ops, int cpu) +{ + struct cbs_cpu_info *spc; + + spc = xzalloc(struct cbs_cpu_info); + BUG_ON(spc == NULL); + INIT_LIST_HEAD(&spc->waitq); + INIT_LIST_HEAD(&spc->runnableq); + + return (void *)spc; +} + +static void +cbs_free_pdata(const struct scheduler *ops, void *spc, int cpu) +{ + if ( spc == NULL ) + return; + + xfree(spc); +} + +static void cbs_free_vdata(const struct scheduler *ops, void *priv) +{ + xfree(priv); +} + +static void * +cbs_alloc_domdata(const struct scheduler *ops, struct domain *d) +{ + return xzalloc(struct cbs_dom_info); +} + +static int cbs_init_domain(const struct scheduler *ops, struct domain *d) +{ + d->sched_priv = cbs_alloc_domdata(ops, d); + if ( d->sched_priv == NULL ) + return -ENOMEM; + + return 0; +} + +static void cbs_free_domdata(const struct scheduler *ops, void *data) +{ + xfree(data); +} + +static void cbs_destroy_domain(const struct scheduler *ops, struct domain *d) +{ + cbs_free_domdata(ops, d->sched_priv); +} + +static int cbs_pick_cpu(const struct scheduler *ops, struct vcpu *v) +{ + cpumask_t online_affinity; + cpumask_t *online; + + online = cpupool_scheduler_cpumask(v->domain->cpupool); + cpumask_and(&online_affinity, v->cpu_affinity, online); + return cpumask_cycle(v->vcpu_id % cpumask_weight(&online_affinity) - 1, + &online_affinity); +} + +/* + * Handles the rescheduling & bookkeeping of vcpus running in their + * guaranteed time budget. + */ +static void desched_edf_vcpu(s_time_t now, struct vcpu *v) +{ + struct cbs_vcpu_info* inf = CBS_VCPU(v); + + /* Current vcpu is running in real time mode */ + ASSERT(__task_on_queue(v)); + + /* Update the vcpu's cputime */ + inf->cputime += now - inf->sched_start_abs; + + /* Scheduling decisions which don't remove the running vcpu from + * the runq */ + if ( (inf->cputime < inf->budget) && cbs_runnable(v) ) + return; + + __del_from_queue(v); + +#ifdef CBS_STATS + /* Manage deadline misses */ + if ( unlikely(inf->deadl_abs < now) ) + { + inf->miss_tot++; + inf->miss_time += inf->cputime; + } +#endif + + /* Manage overruns */ + if ( inf->cputime >= inf->budget ) + { + inf->cputime -= inf->budget; + + + /* Set next deadline */ + inf->deadl_abs += inf->period; + + /* Ensure that the cputime is always less than budget */ + if ( unlikely(inf->cputime > inf->budget) ) + { +#ifdef CBS_STATS + inf->over_tot++; + inf->over_time += inf->cputime; +#endif + + /* Make up for the overage by pushing the deadline + into the future */ + inf->deadl_abs += ((inf->cputime / inf->budget) + * inf->period) * 2; + inf->cputime -= (inf->cputime / inf->budget) * inf->budget; + } + + /* Ensure that the start of the next period is in the future */ + if ( unlikely(PERIOD_BEGIN(inf) < now) ) + inf->deadl_abs += + (DIV_UP(now - PERIOD_BEGIN(inf), + inf->period)) * inf->period; + } + + /* Add a runnable vcpu to the appropriate queue */ + if ( cbs_runnable(v) ) + { + if( cbs_soft(v) ) + { + __add_to_runqueue_sort(v); + } + else + { + __add_to_waitqueue_sort(v); + } + } + + ASSERT(EQ(cbs_runnable(v), __task_on_queue(v))); +} + + +/* Update all elements on the queues */ +static void update_queues( + s_time_t now, struct list_head *runq, struct list_head *waitq) +{ + struct list_head *cur, *tmp; + struct cbs_vcpu_info *curinf; + + /* + * Check for the first elements of the waitqueue, whether their + * next period has already started. + */ + list_for_each_safe ( cur, tmp, waitq ) + { + curinf = list_entry(cur, struct cbs_vcpu_info, list); + if ( PERIOD_BEGIN(curinf) > now ) + break; + __del_from_queue(curinf->vcpu); + __add_to_runqueue_sort(curinf->vcpu); + } + + /* Process the runq, find vcpus that are on the runq that shouldn't */ + list_for_each_safe ( cur, tmp, runq ) + { + curinf = list_entry(cur, struct cbs_vcpu_info, list); + + if ( unlikely(curinf->budget == 0) ) + { + /* Ignore vcpus with empty budget */ + __del_from_queue(curinf->vcpu); + + /* Move them to their next period */ + curinf->deadl_abs += curinf->period; + + /* Ensure that the start of the next period is in the future */ + if ( unlikely(PERIOD_BEGIN(curinf) < now) ) + curinf->deadl_abs += + (DIV_UP(now - PERIOD_BEGIN(curinf), + curinf->period)) * curinf->period; + + /* Put them back into the queue */ + __add_to_waitqueue_sort(curinf->vcpu); + } + + else + break; + } +} + + +static int cbs_init(struct scheduler *ops) +{ + struct cbs_priv_info *prv; + + prv = xzalloc(struct cbs_priv_info); + if ( prv == NULL ) + return -ENOMEM; + + ops->sched_data = prv; + spin_lock_init(&prv->lock); + + return 0; +} + + +static void cbs_deinit(const struct scheduler *ops) +{ + struct cbs_priv_info *prv; + + prv = CBS_PRIV(ops); + if ( prv != NULL ) + xfree(prv); +} + + +/* + * Main scheduling function + * Reasons for calling this function are: + * -budget for the current server is used up + * -vcpu on waitqueue has started it's period + * -and various others ;) in general: determine which vcpu to run next + */ +static struct task_slice cbs_do_schedule( + const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled) +{ + int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct list_head *waitq = WAITQ(cpu); + struct cbs_vcpu_info *inf = CBS_VCPU(current); + struct cbs_vcpu_info *runinf, *waitinf; + struct task_slice ret; + + SCHED_STAT_CRANK(schedule); + + /* Idle tasks don't need any of the following stuff */ + if ( is_idle_vcpu(current) ) + goto check_waitq; + + /* + * Create local state of the status of the vcpu, in order to avoid + * inconsistent state during scheduling decisions, because data for + * vcpu_runnable is not protected by the scheduling lock! + */ + if ( !vcpu_runnable(current) ) + inf->status |= CBS_ASLEEP; + + if ( inf->status & CBS_ASLEEP ) + inf->block_abs = now; + + desched_edf_vcpu(now, current); + check_waitq: + update_queues(now, runq, waitq); + + /* + * Now simply pick the first vcpu from the runqueue, which has the + * earliest deadline, because the list is sorted + * + * Tasklet work (which runs in idle VCPU context) overrides all else. + */ + if ( tasklet_work_scheduled || + (list_empty(runq) && list_empty(waitq)) || + unlikely(!cpumask_test_cpu(cpu, + cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) ) + { + ret.task = IDLETASK(cpu); + ret.time = SECONDS(1); + } + else if ( !list_empty(runq) ) + { + runinf = list_entry(runq->next, struct cbs_vcpu_info, list); + ret.task = runinf->vcpu; + if ( !list_empty(waitq) ) + { + waitinf = list_entry(waitq->next, + struct cbs_vcpu_info, list); + /* + * Rerun scheduler, when scheduled vcpu consumes + * its budget or the first vcpu from the waitqueue + * gets ready. + */ + ret.time = MIN(now + runinf->budget - runinf->cputime, + PERIOD_BEGIN(waitinf)) - now; + } + else + { + ret.time = runinf->budget - runinf->cputime; + } + } + else + { + waitinf = list_entry(waitq->next, struct cbs_vcpu_info, list); + + ret.task = IDLETASK(cpu); + ret.time = PERIOD_BEGIN(waitinf) - now; + } + + /* + * TODO: Do something USEFUL when this happens and find out, why it + * still can happen!!! + */ + if ( ret.time < 0) + printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n", + ret.time); + + ret.migrated = 0; + + CBS_VCPU(ret.task)->sched_start_abs = now; + CHECK(ret.time > 0); + ASSERT(cbs_runnable(ret.task)); + CBS_PCPU(cpu)->current_budget_expires = now + ret.time; + return ret; +} + +static void cbs_sleep(const struct scheduler *ops, struct vcpu *v) +{ + if ( is_idle_vcpu(v) ) + return; + + CBS_VCPU(v)->status |= CBS_ASLEEP; + + if ( per_cpu(schedule_data, v->processor).curr == v ) + { + cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ); + } + else + { + if ( __task_on_queue(v) ) + __del_from_queue(v); + } +} + +/* + * Compares two vcpus in the relation of whether the one is allowed to + * interrupt the others execution. + * It returns true (!=0) if a switch to the other vcpu is good. + * Priority scheme is as follows: + * EDF: early deadline > late deadline + */ +static inline int should_switch(struct vcpu *cur, + struct vcpu *other, + s_time_t now) +{ + struct cbs_vcpu_info *cur_inf, *other_inf; + cur_inf = CBS_VCPU(cur); + other_inf = CBS_VCPU(other); + + /* Always interrupt idle vcpu. */ + if ( is_idle_vcpu(cur) ) + return 1; + + /* Check whether we need to make an earlier scheduling decision */ + if ( PERIOD_BEGIN(other_inf) < + CBS_PCPU(other->processor)->current_budget_expires ) + return 1; + + return 0; +} + +/* + * This function wakes up a vcpu, i.e. moves them into the appropriate queue + * + * For Hard Real-Time vcpus (soft = 0): + * -When a blocked vcpu unblocks, it is allowed to start execution at + * the beginning of the next complete period + * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up + * + * DRRB_____D__U_____DRRRRR___D________ ... + * + * -This causes the vcpu to miss a period (and a deadlline) + * -Doesn't disturb the schedule at all + * -Deadlines keep occuring isochronous + * + * For Soft Real-Time vcpus (soft = 1): + * -Deadlines are set and updated according to the Constant Bandwidth Server + * rule and vcpus are moved immediately to the run queue. + * + */ +static void cbs_wake(const struct scheduler *ops, struct vcpu *v) +{ + s_time_t now = NOW(); + struct cbs_vcpu_info* inf = CBS_VCPU(v); + + if ( unlikely(is_idle_vcpu(v)) ) + return; + + if ( unlikely(__task_on_queue(v)) ) + return; + + ASSERT(!cbs_runnable(v)); + inf->status &= ~CBS_ASLEEP; + + if ( unlikely(inf->deadl_abs == 0) ) + { + /* Initial setup of the deadline */ + inf->deadl_abs = now + inf->budget; + } + +#ifdef CBS_STATS + inf->block_tot++; +#endif + + if ( cbs_soft(v) ) + { + /* Apply CBS rule + * Where: + * c == Remaining server budget == (inf->budget - cpu_time) + * d == Server (vcpu) deadline == inf->deadl_abs + * r == Wake-up time of vcpu == now + * U == Server (vcpu) bandwidth == (inf->budget / inf->period) + * + * if c>=(d-r)*U ---> + * (inf->budget - cputime) >= (inf->deadl_abs - now) * inf->period + * + * If true, push deadline back by one period and refresh budget, else + * use current budget and deadline. + */ + if((inf->budget - inf->cputime) >= + ((inf->deadl_abs - now) * (inf->budget / inf->period))) + { + /* Push back deadline by one period */ + inf->deadl_abs += inf->period; + inf->cputime = 0; + } + + /* In CBS we don't care if the period has begun, + * the task doesn't have to wait for its period + * because it'll never request more than its budget + * for any given period. + */ + __add_to_runqueue_sort(v); + } + else { + /* Task is a hard task, treat accordingly */ +#ifdef CBS_STATS + if ( now < inf->deadl_abs ) + { + /* Short blocking */ + inf->short_block_tot++; + } + else + { + /* Long unblocking, someone is going to miss their deadline. */ + inf->long_block_tot++; + } +#endif + + if ( PERIOD_BEGIN(inf) > now ) + __add_to_waitqueue_sort(v); + else + __add_to_runqueue_sort(v); + } + +#ifdef CBS_STATS + /* Do some statistics here... */ + if ( inf->block_abs != 0 ) + { + inf->block_time_tot += now - inf->block_abs; + } +#endif + + ASSERT(__task_on_queue(v)); + /* + * Check whether the awakened task needs to invoke the do_schedule + * routine. Try to avoid unnecessary runs but: + * Safe approximation: Always switch to scheduler! + */ + ASSERT(v->processor >= 0); + ASSERT(v->processor < nr_cpu_ids); + ASSERT(per_cpu(schedule_data, v->processor).curr); + + if ( should_switch(per_cpu(schedule_data, v->processor).curr, v, now) ) + cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ); +} + +/* Print a lot of useful information about a vcpus in the system */ +static void cbs_dump_vcpu(struct vcpu *v) +{ + printk("%i.%i has=%c ", v->domain->domain_id, v->vcpu_id, + v->is_running ? 'T':'F'); + printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64, + CBS_VCPU(v)->period, CBS_VCPU(v)->budget, CBS_VCPU(v)->deadl_abs); + +#ifdef CBS_STATS + printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64, + CBS_VCPU(v)->miss_tot, CBS_VCPU(v)->miss_time, + CBS_VCPU(v)->over_tot, CBS_VCPU(v)->over_time); + + if ( CBS_VCPU(v)->block_tot != 0 ) + printk("\n blks=%u sh=%u (%u%%) "\ + "l=%u (%u%%) avg: b=%"PRIu64, + CBS_VCPU(v)->block_tot, CBS_VCPU(v)->short_block_tot, + (CBS_VCPU(v)->short_block_tot * 100) / CBS_VCPU(v)->block_tot, + CBS_VCPU(v)->long_block_tot, + (CBS_VCPU(v)->long_block_tot * 100) / CBS_VCPU(v)->block_tot, + (CBS_VCPU(v)->block_time_tot) / CBS_VCPU(v)->block_tot); +#endif + printk("\n"); +} + + +/* Dumps all vcpus on the specified cpu */ +static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu) +{ + struct list_head *list, *queue, *tmp; + struct cbs_vcpu_info *v_inf; + struct domain *d; + struct vcpu *v; + int loop = 0; + + printk("now=%"PRIu64"\n", NOW()); + queue = RUNQ(cpu); + printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue, + (unsigned long) queue->next, (unsigned long) queue->prev); + list_for_each_safe ( list, tmp, queue ) + { + printk("%3d: ", loop++); + v_inf = list_entry(list, struct cbs_vcpu_info, list); + cbs_dump_vcpu(v_inf->vcpu); + } + + queue = WAITQ(cpu); + loop = 0; + printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue, + (unsigned long) queue->next, (unsigned long) queue->prev); + list_for_each_safe ( list, tmp, queue ) + { + printk("%3d: ", loop++); + v_inf = list_entry(list, struct cbs_vcpu_info, list); + cbs_dump_vcpu(v_inf->vcpu); + } + + loop = 0; + printk("\nnot on Q\n"); + + rcu_read_lock(&domlist_read_lock); + for_each_domain ( d ) + { + if ( (d->cpupool ? d->cpupool->sched : &sched_cbs_def) != ops ) + continue; + for_each_vcpu(d, v) + { + if ( !__task_on_queue(v) && (v->processor == cpu) ) + { + printk("%3d: ", loop++); + cbs_dump_vcpu(v); + } + } + } + rcu_read_unlock(&domlist_read_lock); +} + + +/* Set or fetch domain scheduling parameters */ +static int cbs_adjust(const struct scheduler *ops, struct domain *d, struct xen_domctl_scheduler_op *op) +{ + struct cbs_priv_info *prv = CBS_PRIV(ops); + unsigned long flags; + s_time_t now = NOW(); + struct vcpu *v; + int rc = 0; + + /* + * Serialize against the pluggable scheduler lock to protect from + * concurrent updates. We need to take the runq lock for the VCPUs + * as well, since we are touching budget and period. + * + * As in sched_credit2.c, runq locks nest inside the pluggable scheduler + * lock. + */ + spin_lock_irqsave(&prv->lock, flags); + + if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo ) + { + /* Check for sane parameters */ + if ( !op->u.cbs.period ) + { + printk("Period Not set"); + rc = -EINVAL; + goto out; + } + + /* + * Sanity checking + */ + if ( (op->u.cbs.period > PERIOD_MAX) || + (op->u.cbs.period < PERIOD_MIN) || + (op->u.cbs.budget > op->u.cbs.period) || + (op->u.cbs.budget < BUDGET_MIN) ) + { + printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.cbs.period, op->u.cbs.budget); + rc = -EINVAL; + goto out; + } + + /* Time-driven domains */ + for_each_vcpu ( d, v ) + { + spinlock_t *lock = vcpu_schedule_lock(v); + + CBS_VCPU(v)->period = op->u.cbs.period; + CBS_VCPU(v)->budget = op->u.cbs.budget; + if(op->u.cbs.soft) + { + CBS_VCPU(v)->status |= CBS_SOFT_TASK; + } + else + { + /* Correct deadline when switching from a soft to hard vcpu */ + if( unlikely((CBS_VCPU(v)->deadl_abs - now) >= (CBS_VCPU(v)->period * 3)) ) + { + CBS_VCPU(v)->deadl_abs = (now - CBS_VCPU(v)->cputime) + (2 * CBS_VCPU(v)->period); + } + + CBS_VCPU(v)->status &= (~CBS_SOFT_TASK); + } + vcpu_schedule_unlock(lock, v); + } + } + else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) + { + if ( d->vcpu[0] == NULL ) + { + rc = -EINVAL; + goto out; + } + + op->u.cbs.period = CBS_VCPU(d->vcpu[0])->period; + op->u.cbs.budget = CBS_VCPU(d->vcpu[0])->budget; + op->u.cbs.soft = cbs_soft(d->vcpu[0]); + } + +out: + spin_unlock_irqrestore(&prv->lock, flags); + + return rc; +} + +static struct cbs_priv_info _cbs_priv; + +const struct scheduler sched_cbs_def = { + .name = "Constant Bandwidth Server Scheduler", + .opt_name = "cbs", + .sched_id = XEN_SCHEDULER_CBS, + .sched_data = &_cbs_priv, + + .init_domain = cbs_init_domain, + .destroy_domain = cbs_destroy_domain, + + .insert_vcpu = cbs_insert_vcpu, + + .alloc_vdata = cbs_alloc_vdata, + .free_vdata = cbs_free_vdata, + .alloc_pdata = cbs_alloc_pdata, + .free_pdata = cbs_free_pdata, + .alloc_domdata = cbs_alloc_domdata, + .free_domdata = cbs_free_domdata, + + .init = cbs_init, + .deinit = cbs_deinit, + + .do_schedule = cbs_do_schedule, + .pick_cpu = cbs_pick_cpu, + .dump_cpu_state = cbs_dump_cpu_state, + .sleep = cbs_sleep, + .wake = cbs_wake, + .adjust = cbs_adjust, +}; + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c deleted file mode 100644 index 5df4825..0000000 --- a/xen/common/sched_sedf.c +++ /dev/null @@ -1,917 +0,0 @@ -/****************************************************************************** - * Constant Bandwidth Server Scheduler for Xen - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * by DornerWorks Ltd. (C) 2014 Grand Rapids, MI - * - * Adapted from code by Stephan Diestelhorst (C) 2004 Cambridge University - * and Mark Williamson (C) 2004 Intel Research Cambridge - * - */ - -#include <xen/lib.h> -#include <xen/sched.h> -#include <xen/sched-if.h> -#include <xen/timer.h> -#include <xen/softirq.h> -#include <xen/time.h> -#include <xen/errno.h> - -#ifndef NDEBUG -#define CBS_STATS -#define CHECK(_p) \ - do { \ - if ( !(_p) ) \ - printk("Check '%s' failed, line %d, file %s\n", \ - #_p , __LINE__, __FILE__); \ - } while ( 0 ) -#else -#define CHECK(_p) ((void)0) -#endif - -#define CBS_SOFT_TASK (1) -#define CBS_ASLEEP (16) - -#define DEFAULT_PERIOD (MILLISECS(20)) -#define DEFAULT_BUDGET (MILLISECS(10)) - -#define PERIOD_MAX MILLISECS(10000) /* 10s */ -#define PERIOD_MIN (MICROSECS(10)) /* 10us */ -#define BUDGET_MIN (MICROSECS(5)) /* 5us */ - -#define EQ(_A, _B) ((!!(_A)) == (!!(_B))) - - -struct cbs_dom_info { - struct domain *domain; -}; - -struct cbs_priv_info { - /* lock for the whole pluggable scheduler, nests inside cpupool_lock */ - spinlock_t lock; -}; - -struct cbs_vcpu_info { - struct vcpu *vcpu; - struct list_head list; - - - /* Parameters for EDF-CBS */ - s_time_t period; /* = Server scheduling period */ - s_time_t budget; /* = Guarenteed minimum CPU time per period */ - /* Note: Server bandwidth = (budget / period) */ - - /* Status of vcpu */ - int status; - /* Bookkeeping */ - s_time_t deadl_abs; - s_time_t sched_start_abs; - s_time_t cputime; - /* Times the vcpu un-/blocked */ - s_time_t block_abs; - s_time_t unblock_abs; - -#ifdef CBS_STATS - s_time_t block_time_tot; - int block_tot; - int short_block_tot; - int long_block_tot; - s_time_t miss_time; - s_time_t over_time; - int miss_tot; - int over_tot; -#endif -}; - -struct cbs_cpu_info { - struct list_head runnableq; - struct list_head waitq; - s_time_t current_budget_expires; -}; - -#define CBS_PRIV(_ops) \ - ((struct cbs_priv_info *)((_ops)->sched_data)) -#define CBS_VCPU(_vcpu) ((struct cbs_vcpu_info *)((_vcpu)->sched_priv)) -#define CBS_PCPU(_cpu) \ - ((struct cbs_cpu_info *)per_cpu(schedule_data, _cpu).sched_priv) -#define LIST(_vcpu) (&CBS_VCPU(_vcpu)->list) -#define RUNQ(_cpu) (&CBS_PCPU(_cpu)->runnableq) -#define WAITQ(_cpu) (&CBS_PCPU(_cpu)->waitq) -#define IDLETASK(_cpu) (idle_vcpu[_cpu]) - -#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period) - -#define DIV_UP(_X, _Y) (((_X) + (_Y) - 1) / _Y) - -#define cbs_runnable(edom) (!(CBS_VCPU(edom)->status & CBS_ASLEEP)) - -#define cbs_soft(edom) (CBS_VCPU(edom)->status & CBS_SOFT_TASK) - -static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu); - -static inline int __task_on_queue(struct vcpu *v) -{ - return (((LIST(v))->next != NULL) && (LIST(v)->next != LIST(v))); -} - -static inline void __del_from_queue(struct vcpu *v) -{ - struct list_head *list = LIST(v); - ASSERT(__task_on_queue(v)); - list_del(list); - list->next = NULL; - ASSERT(!__task_on_queue(v)); -} - -typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2); - -static inline void list_insert_sort( - struct list_head *list, struct list_head *element, list_comparer comp) -{ - struct list_head *cur; - - /* Iterate through all elements to find our "hole" */ - list_for_each( cur, list ) - if ( comp(element, cur) < 0 ) - break; - - /* cur now contains the element, before which we'll enqueue */ - list_add(element, cur->prev); -} - -#define VCPU_COMPARER(name, field, comp1, comp2) \ -static int name##_comp(struct list_head* el1, struct list_head* el2) \ -{ \ - struct cbs_vcpu_info *v1, *v2; \ - v1 = list_entry(el1, struct cbs_vcpu_info, field); \ - v2 = list_entry(el2, struct cbs_vcpu_info, field); \ - if ( (comp1) == (comp2) ) \ - return 0; \ - if ( (comp1) < (comp2) ) \ - return -1; \ - else \ - return 1; \ -} - -/* - * Adds a vcpu to the queue of processes which wait for the beginning of the - * next period; this list is therefore sortet by this time, which is simply - * absol. deadline - period. - */ -VCPU_COMPARER(waitq, list, PERIOD_BEGIN(v1), PERIOD_BEGIN(v2)); -static inline void __add_to_waitqueue_sort(struct vcpu *v) -{ - ASSERT(!__task_on_queue(v)); - list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp); - ASSERT(__task_on_queue(v)); -} - -/* - * Adds a vcpu to the queue of processes which have started their current - * period and are runnable (i.e. not blocked, dieing,...). The first element - * on this list is running on the processor, if the list is empty the idle - * task will run. As we are implementing EDF, this list is sorted by deadlines. - */ -VCPU_COMPARER(runq, list, v1->deadl_abs, v2->deadl_abs); -static inline void __add_to_runqueue_sort(struct vcpu *v) -{ - list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp); -} - - -static void cbs_insert_vcpu(const struct scheduler *ops, struct vcpu *v) -{ - if ( is_idle_vcpu(v) ) - { - CBS_VCPU(v)->deadl_abs = 0; - CBS_VCPU(v)->status &= ~CBS_ASLEEP; - } -} - -static void *cbs_alloc_vdata(const struct scheduler *ops, struct vcpu *v, void *dd) -{ - struct cbs_vcpu_info *inf; - - inf = xzalloc(struct cbs_vcpu_info); - if ( inf == NULL ) - return NULL; - - inf->vcpu = v; - - inf->deadl_abs = 0; - inf->cputime = 0; - inf->status = CBS_ASLEEP; - - if (v->domain->domain_id == 0) - { - /* Domain 0, needs a budget to boot the machine */ - inf->period = DEFAULT_PERIOD; - inf->budget = DEFAULT_BUDGET; - } - else - { - inf->period = DEFAULT_PERIOD; - inf->budget = 0; - } - - INIT_LIST_HEAD(&(inf->list)); - - SCHED_STAT_CRANK(vcpu_init); - - return inf; -} - -static void * -cbs_alloc_pdata(const struct scheduler *ops, int cpu) -{ - struct cbs_cpu_info *spc; - - spc = xzalloc(struct cbs_cpu_info); - BUG_ON(spc == NULL); - INIT_LIST_HEAD(&spc->waitq); - INIT_LIST_HEAD(&spc->runnableq); - - return (void *)spc; -} - -static void -cbs_free_pdata(const struct scheduler *ops, void *spc, int cpu) -{ - if ( spc == NULL ) - return; - - xfree(spc); -} - -static void cbs_free_vdata(const struct scheduler *ops, void *priv) -{ - xfree(priv); -} - -static void * -cbs_alloc_domdata(const struct scheduler *ops, struct domain *d) -{ - return xzalloc(struct cbs_dom_info); -} - -static int cbs_init_domain(const struct scheduler *ops, struct domain *d) -{ - d->sched_priv = cbs_alloc_domdata(ops, d); - if ( d->sched_priv == NULL ) - return -ENOMEM; - - return 0; -} - -static void cbs_free_domdata(const struct scheduler *ops, void *data) -{ - xfree(data); -} - -static void cbs_destroy_domain(const struct scheduler *ops, struct domain *d) -{ - cbs_free_domdata(ops, d->sched_priv); -} - -static int cbs_pick_cpu(const struct scheduler *ops, struct vcpu *v) -{ - cpumask_t online_affinity; - cpumask_t *online; - - online = cpupool_scheduler_cpumask(v->domain->cpupool); - cpumask_and(&online_affinity, v->cpu_affinity, online); - return cpumask_cycle(v->vcpu_id % cpumask_weight(&online_affinity) - 1, - &online_affinity); -} - -/* - * Handles the rescheduling & bookkeeping of vcpus running in their - * guaranteed time budget. - */ -static void desched_edf_vcpu(s_time_t now, struct vcpu *v) -{ - struct cbs_vcpu_info* inf = CBS_VCPU(v); - - /* Current vcpu is running in real time mode */ - ASSERT(__task_on_queue(v)); - - /* Update the vcpu's cputime */ - inf->cputime += now - inf->sched_start_abs; - - /* Scheduling decisions which don't remove the running vcpu from - * the runq */ - if ( (inf->cputime < inf->budget) && cbs_runnable(v) ) - return; - - __del_from_queue(v); - -#ifdef CBS_STATS - /* Manage deadline misses */ - if ( unlikely(inf->deadl_abs < now) ) - { - inf->miss_tot++; - inf->miss_time += inf->cputime; - } -#endif - - /* Manage overruns */ - if ( inf->cputime >= inf->budget ) - { - inf->cputime -= inf->budget; - - - /* Set next deadline */ - inf->deadl_abs += inf->period; - - /* Ensure that the cputime is always less than budget */ - if ( unlikely(inf->cputime > inf->budget) ) - { -#ifdef CBS_STATS - inf->over_tot++; - inf->over_time += inf->cputime; -#endif - - /* Make up for the overage by pushing the deadline - into the future */ - inf->deadl_abs += ((inf->cputime / inf->budget) - * inf->period) * 2; - inf->cputime -= (inf->cputime / inf->budget) * inf->budget; - } - - /* Ensure that the start of the next period is in the future */ - if ( unlikely(PERIOD_BEGIN(inf) < now) ) - inf->deadl_abs += - (DIV_UP(now - PERIOD_BEGIN(inf), - inf->period)) * inf->period; - } - - /* Add a runnable vcpu to the appropriate queue */ - if ( cbs_runnable(v) ) - { - if( cbs_soft(v) ) - { - __add_to_runqueue_sort(v); - } - else - { - __add_to_waitqueue_sort(v); - } - } - - ASSERT(EQ(cbs_runnable(v), __task_on_queue(v))); -} - - -/* Update all elements on the queues */ -static void update_queues( - s_time_t now, struct list_head *runq, struct list_head *waitq) -{ - struct list_head *cur, *tmp; - struct cbs_vcpu_info *curinf; - - /* - * Check for the first elements of the waitqueue, whether their - * next period has already started. - */ - list_for_each_safe ( cur, tmp, waitq ) - { - curinf = list_entry(cur, struct cbs_vcpu_info, list); - if ( PERIOD_BEGIN(curinf) > now ) - break; - __del_from_queue(curinf->vcpu); - __add_to_runqueue_sort(curinf->vcpu); - } - - /* Process the runq, find vcpus that are on the runq that shouldn't */ - list_for_each_safe ( cur, tmp, runq ) - { - curinf = list_entry(cur, struct cbs_vcpu_info, list); - - if ( unlikely(curinf->budget == 0) ) - { - /* Ignore vcpus with empty budget */ - __del_from_queue(curinf->vcpu); - - /* Move them to their next period */ - curinf->deadl_abs += curinf->period; - - /* Ensure that the start of the next period is in the future */ - if ( unlikely(PERIOD_BEGIN(curinf) < now) ) - curinf->deadl_abs += - (DIV_UP(now - PERIOD_BEGIN(curinf), - curinf->period)) * curinf->period; - - /* Put them back into the queue */ - __add_to_waitqueue_sort(curinf->vcpu); - } - - else - break; - } -} - - -static int cbs_init(struct scheduler *ops) -{ - struct cbs_priv_info *prv; - - prv = xzalloc(struct cbs_priv_info); - if ( prv == NULL ) - return -ENOMEM; - - ops->sched_data = prv; - spin_lock_init(&prv->lock); - - return 0; -} - - -static void cbs_deinit(const struct scheduler *ops) -{ - struct cbs_priv_info *prv; - - prv = CBS_PRIV(ops); - if ( prv != NULL ) - xfree(prv); -} - - -/* - * Main scheduling function - * Reasons for calling this function are: - * -budget for the current server is used up - * -vcpu on waitqueue has started it's period - * -and various others ;) in general: determine which vcpu to run next - */ -static struct task_slice cbs_do_schedule( - const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled) -{ - int cpu = smp_processor_id(); - struct list_head *runq = RUNQ(cpu); - struct list_head *waitq = WAITQ(cpu); - struct cbs_vcpu_info *inf = CBS_VCPU(current); - struct cbs_vcpu_info *runinf, *waitinf; - struct task_slice ret; - - SCHED_STAT_CRANK(schedule); - - /* Idle tasks don't need any of the following stuff */ - if ( is_idle_vcpu(current) ) - goto check_waitq; - - /* - * Create local state of the status of the vcpu, in order to avoid - * inconsistent state during scheduling decisions, because data for - * vcpu_runnable is not protected by the scheduling lock! - */ - if ( !vcpu_runnable(current) ) - inf->status |= CBS_ASLEEP; - - if ( inf->status & CBS_ASLEEP ) - inf->block_abs = now; - - desched_edf_vcpu(now, current); - check_waitq: - update_queues(now, runq, waitq); - - /* - * Now simply pick the first vcpu from the runqueue, which has the - * earliest deadline, because the list is sorted - * - * Tasklet work (which runs in idle VCPU context) overrides all else. - */ - if ( tasklet_work_scheduled || - (list_empty(runq) && list_empty(waitq)) || - unlikely(!cpumask_test_cpu(cpu, - cpupool_scheduler_cpumask(per_cpu(cpupool, cpu)))) ) - { - ret.task = IDLETASK(cpu); - ret.time = SECONDS(1); - } - else if ( !list_empty(runq) ) - { - runinf = list_entry(runq->next, struct cbs_vcpu_info, list); - ret.task = runinf->vcpu; - if ( !list_empty(waitq) ) - { - waitinf = list_entry(waitq->next, - struct cbs_vcpu_info, list); - /* - * Rerun scheduler, when scheduled vcpu consumes - * its budget or the first vcpu from the waitqueue - * gets ready. - */ - ret.time = MIN(now + runinf->budget - runinf->cputime, - PERIOD_BEGIN(waitinf)) - now; - } - else - { - ret.time = runinf->budget - runinf->cputime; - } - } - else - { - waitinf = list_entry(waitq->next, struct cbs_vcpu_info, list); - - ret.task = IDLETASK(cpu); - ret.time = PERIOD_BEGIN(waitinf) - now; - } - - /* - * TODO: Do something USEFUL when this happens and find out, why it - * still can happen!!! - */ - if ( ret.time < 0) - printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n", - ret.time); - - ret.migrated = 0; - - CBS_VCPU(ret.task)->sched_start_abs = now; - CHECK(ret.time > 0); - ASSERT(cbs_runnable(ret.task)); - CBS_PCPU(cpu)->current_budget_expires = now + ret.time; - return ret; -} - -static void cbs_sleep(const struct scheduler *ops, struct vcpu *v) -{ - if ( is_idle_vcpu(v) ) - return; - - CBS_VCPU(v)->status |= CBS_ASLEEP; - - if ( per_cpu(schedule_data, v->processor).curr == v ) - { - cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ); - } - else - { - if ( __task_on_queue(v) ) - __del_from_queue(v); - } -} - -/* - * Compares two vcpus in the relation of whether the one is allowed to - * interrupt the others execution. - * It returns true (!=0) if a switch to the other vcpu is good. - * Priority scheme is as follows: - * EDF: early deadline > late deadline - */ -static inline int should_switch(struct vcpu *cur, - struct vcpu *other, - s_time_t now) -{ - struct cbs_vcpu_info *cur_inf, *other_inf; - cur_inf = CBS_VCPU(cur); - other_inf = CBS_VCPU(other); - - /* Always interrupt idle vcpu. */ - if ( is_idle_vcpu(cur) ) - return 1; - - /* Check whether we need to make an earlier scheduling decision */ - if ( PERIOD_BEGIN(other_inf) < - CBS_PCPU(other->processor)->current_budget_expires ) - return 1; - - return 0; -} - -/* - * This function wakes up a vcpu, i.e. moves them into the appropriate queue - * - * For Hard Real-Time vcpus (soft = 0): - * -When a blocked vcpu unblocks, it is allowed to start execution at - * the beginning of the next complete period - * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up - * - * DRRB_____D__U_____DRRRRR___D________ ... - * - * -This causes the vcpu to miss a period (and a deadlline) - * -Doesn't disturb the schedule at all - * -Deadlines keep occuring isochronous - * - * For Soft Real-Time vcpus (soft = 1): - * -Deadlines are set and updated according to the Constant Bandwidth Server - * rule and vcpus are moved immediately to the run queue. - * - */ -static void cbs_wake(const struct scheduler *ops, struct vcpu *v) -{ - s_time_t now = NOW(); - struct cbs_vcpu_info* inf = CBS_VCPU(v); - - if ( unlikely(is_idle_vcpu(v)) ) - return; - - if ( unlikely(__task_on_queue(v)) ) - return; - - ASSERT(!cbs_runnable(v)); - inf->status &= ~CBS_ASLEEP; - - if ( unlikely(inf->deadl_abs == 0) ) - { - /* Initial setup of the deadline */ - inf->deadl_abs = now + inf->budget; - } - -#ifdef CBS_STATS - inf->block_tot++; -#endif - - if ( cbs_soft(v) ) - { - /* Apply CBS rule - * Where: - * c == Remaining server budget == (inf->budget - cpu_time) - * d == Server (vcpu) deadline == inf->deadl_abs - * r == Wake-up time of vcpu == now - * U == Server (vcpu) bandwidth == (inf->budget / inf->period) - * - * if c>=(d-r)*U ---> - * (inf->budget - cputime) >= (inf->deadl_abs - now) * inf->period - * - * If true, push deadline back by one period and refresh budget, else - * use current budget and deadline. - */ - if((inf->budget - inf->cputime) >= - ((inf->deadl_abs - now) * (inf->budget / inf->period))) - { - /* Push back deadline by one period */ - inf->deadl_abs += inf->period; - inf->cputime = 0; - } - - /* In CBS we don't care if the period has begun, - * the task doesn't have to wait for its period - * because it'll never request more than its budget - * for any given period. - */ - __add_to_runqueue_sort(v); - } - else { - /* Task is a hard task, treat accordingly */ -#ifdef CBS_STATS - if ( now < inf->deadl_abs ) - { - /* Short blocking */ - inf->short_block_tot++; - } - else - { - /* Long unblocking, someone is going to miss their deadline. */ - inf->long_block_tot++; - } -#endif - - if ( PERIOD_BEGIN(inf) > now ) - __add_to_waitqueue_sort(v); - else - __add_to_runqueue_sort(v); - } - -#ifdef CBS_STATS - /* Do some statistics here... */ - if ( inf->block_abs != 0 ) - { - inf->block_time_tot += now - inf->block_abs; - } -#endif - - ASSERT(__task_on_queue(v)); - /* - * Check whether the awakened task needs to invoke the do_schedule - * routine. Try to avoid unnecessary runs but: - * Safe approximation: Always switch to scheduler! - */ - ASSERT(v->processor >= 0); - ASSERT(v->processor < nr_cpu_ids); - ASSERT(per_cpu(schedule_data, v->processor).curr); - - if ( should_switch(per_cpu(schedule_data, v->processor).curr, v, now) ) - cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ); -} - -/* Print a lot of useful information about a vcpus in the system */ -static void cbs_dump_vcpu(struct vcpu *v) -{ - printk("%i.%i has=%c ", v->domain->domain_id, v->vcpu_id, - v->is_running ? 'T':'F'); - printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64, - CBS_VCPU(v)->period, CBS_VCPU(v)->budget, CBS_VCPU(v)->deadl_abs); - -#ifdef CBS_STATS - printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64, - CBS_VCPU(v)->miss_tot, CBS_VCPU(v)->miss_time, - CBS_VCPU(v)->over_tot, CBS_VCPU(v)->over_time); - - if ( CBS_VCPU(v)->block_tot != 0 ) - printk("\n blks=%u sh=%u (%u%%) "\ - "l=%u (%u%%) avg: b=%"PRIu64, - CBS_VCPU(v)->block_tot, CBS_VCPU(v)->short_block_tot, - (CBS_VCPU(v)->short_block_tot * 100) / CBS_VCPU(v)->block_tot, - CBS_VCPU(v)->long_block_tot, - (CBS_VCPU(v)->long_block_tot * 100) / CBS_VCPU(v)->block_tot, - (CBS_VCPU(v)->block_time_tot) / CBS_VCPU(v)->block_tot); -#endif - printk("\n"); -} - - -/* Dumps all vcpus on the specified cpu */ -static void cbs_dump_cpu_state(const struct scheduler *ops, int cpu) -{ - struct list_head *list, *queue, *tmp; - struct cbs_vcpu_info *v_inf; - struct domain *d; - struct vcpu *v; - int loop = 0; - - printk("now=%"PRIu64"\n", NOW()); - queue = RUNQ(cpu); - printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue, - (unsigned long) queue->next, (unsigned long) queue->prev); - list_for_each_safe ( list, tmp, queue ) - { - printk("%3d: ", loop++); - v_inf = list_entry(list, struct cbs_vcpu_info, list); - cbs_dump_vcpu(v_inf->vcpu); - } - - queue = WAITQ(cpu); - loop = 0; - printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue, - (unsigned long) queue->next, (unsigned long) queue->prev); - list_for_each_safe ( list, tmp, queue ) - { - printk("%3d: ", loop++); - v_inf = list_entry(list, struct cbs_vcpu_info, list); - cbs_dump_vcpu(v_inf->vcpu); - } - - loop = 0; - printk("\nnot on Q\n"); - - rcu_read_lock(&domlist_read_lock); - for_each_domain ( d ) - { - if ( (d->cpupool ? d->cpupool->sched : &sched_cbs_def) != ops ) - continue; - for_each_vcpu(d, v) - { - if ( !__task_on_queue(v) && (v->processor == cpu) ) - { - printk("%3d: ", loop++); - cbs_dump_vcpu(v); - } - } - } - rcu_read_unlock(&domlist_read_lock); -} - - -/* Set or fetch domain scheduling parameters */ -static int cbs_adjust(const struct scheduler *ops, struct domain *d, struct xen_domctl_scheduler_op *op) -{ - struct cbs_priv_info *prv = CBS_PRIV(ops); - unsigned long flags; - s_time_t now = NOW(); - struct vcpu *v; - int rc = 0; - - /* - * Serialize against the pluggable scheduler lock to protect from - * concurrent updates. We need to take the runq lock for the VCPUs - * as well, since we are touching budget and period. - * - * As in sched_credit2.c, runq locks nest inside the pluggable scheduler - * lock. - */ - spin_lock_irqsave(&prv->lock, flags); - - if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo ) - { - /* Check for sane parameters */ - if ( !op->u.cbs.period ) - { - printk("Period Not set"); - rc = -EINVAL; - goto out; - } - - /* - * Sanity checking - */ - if ( (op->u.cbs.period > PERIOD_MAX) || - (op->u.cbs.period < PERIOD_MIN) || - (op->u.cbs.budget > op->u.cbs.period) || - (op->u.cbs.budget < BUDGET_MIN) ) - { - printk("Insane Parameters: period: %lu\tbudget: %lu\n", op->u.cbs.period, op->u.cbs.budget); - rc = -EINVAL; - goto out; - } - - /* Time-driven domains */ - for_each_vcpu ( d, v ) - { - spinlock_t *lock = vcpu_schedule_lock(v); - - CBS_VCPU(v)->period = op->u.cbs.period; - CBS_VCPU(v)->budget = op->u.cbs.budget; - if(op->u.cbs.soft) - { - CBS_VCPU(v)->status |= CBS_SOFT_TASK; - } - else - { - /* Correct deadline when switching from a soft to hard vcpu */ - if( unlikely((CBS_VCPU(v)->deadl_abs - now) >= (CBS_VCPU(v)->period * 3)) ) - { - CBS_VCPU(v)->deadl_abs = (now - CBS_VCPU(v)->cputime) + (2 * CBS_VCPU(v)->period); - } - - CBS_VCPU(v)->status &= (~CBS_SOFT_TASK); - } - vcpu_schedule_unlock(lock, v); - } - } - else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) - { - if ( d->vcpu[0] == NULL ) - { - rc = -EINVAL; - goto out; - } - - op->u.cbs.period = CBS_VCPU(d->vcpu[0])->period; - op->u.cbs.budget = CBS_VCPU(d->vcpu[0])->budget; - op->u.cbs.soft = cbs_soft(d->vcpu[0]); - } - -out: - spin_unlock_irqrestore(&prv->lock, flags); - - return rc; -} - -static struct cbs_priv_info _cbs_priv; - -const struct scheduler sched_cbs_def = { - .name = "Constant Bandwidth Server Scheduler", - .opt_name = "cbs", - .sched_id = XEN_SCHEDULER_CBS, - .sched_data = &_cbs_priv, - - .init_domain = cbs_init_domain, - .destroy_domain = cbs_destroy_domain, - - .insert_vcpu = cbs_insert_vcpu, - - .alloc_vdata = cbs_alloc_vdata, - .free_vdata = cbs_free_vdata, - .alloc_pdata = cbs_alloc_pdata, - .free_pdata = cbs_free_pdata, - .alloc_domdata = cbs_alloc_domdata, - .free_domdata = cbs_free_domdata, - - .init = cbs_init, - .deinit = cbs_deinit, - - .do_schedule = cbs_do_schedule, - .pick_cpu = cbs_pick_cpu, - .dump_cpu_state = cbs_dump_cpu_state, - .sleep = cbs_sleep, - .wake = cbs_wake, - .adjust = cbs_adjust, -}; - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.