[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen staging] xen/sched: make sched-if.h really scheduler private
commit cbe977f5e0b42931fd76169595c4ab208e0d79af Author: Juergen Gross <jgross@xxxxxxxx> AuthorDate: Thu Nov 7 15:34:37 2019 +0100 Commit: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> CommitDate: Wed Jan 22 17:37:11 2020 +0000 xen/sched: make sched-if.h really scheduler private include/xen/sched-if.h should be private to scheduler code, so move it to common/sched/private.h and move the remaining use cases to cpupool.c and core.c. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> Reviewed-by: Dario Faggioli <dfaggioli@xxxxxxxx> --- xen/arch/x86/dom0_build.c | 5 +- xen/common/domain.c | 70 ----- xen/common/domctl.c | 135 +--------- xen/common/sched/arinc653.c | 3 +- xen/common/sched/core.c | 191 +++++++++++++- xen/common/sched/cpupool.c | 13 +- xen/common/sched/credit.c | 2 +- xen/common/sched/credit2.c | 3 +- xen/common/sched/null.c | 3 +- xen/common/sched/private.h | 622 +++++++++++++++++++++++++++++++++++++++++++ xen/common/sched/rt.c | 3 +- xen/include/xen/domain.h | 3 + xen/include/xen/sched-if.h | 625 -------------------------------------------- xen/include/xen/sched.h | 7 + 14 files changed, 850 insertions(+), 835 deletions(-) diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c index 28b964e018..56c2dee0fc 100644 --- a/xen/arch/x86/dom0_build.c +++ b/xen/arch/x86/dom0_build.c @@ -9,7 +9,6 @@ #include <xen/libelf.h> #include <xen/pfn.h> #include <xen/sched.h> -#include <xen/sched-if.h> #include <xen/softirq.h> #include <asm/amd.h> @@ -227,9 +226,9 @@ unsigned int __init dom0_max_vcpus(void) dom0_nodes = node_online_map; for_each_node_mask ( node, dom0_nodes ) cpumask_or(&dom0_cpus, &dom0_cpus, &node_to_cpumask(node)); - cpumask_and(&dom0_cpus, &dom0_cpus, cpupool0->cpu_valid); + cpumask_and(&dom0_cpus, &dom0_cpus, cpupool_valid_cpus(cpupool0)); if ( cpumask_empty(&dom0_cpus) ) - cpumask_copy(&dom0_cpus, cpupool0->cpu_valid); + cpumask_copy(&dom0_cpus, cpupool_valid_cpus(cpupool0)); max_vcpus = cpumask_weight(&dom0_cpus); if ( opt_dom0_max_vcpus_min > max_vcpus ) diff --git a/xen/common/domain.c b/xen/common/domain.c index ee3f9ffd3e..dfea575b49 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -10,7 +10,6 @@ #include <xen/ctype.h> #include <xen/err.h> #include <xen/sched.h> -#include <xen/sched-if.h> #include <xen/domain.h> #include <xen/mm.h> #include <xen/event.h> @@ -577,75 +576,6 @@ void __init setup_system_domains(void) #endif } -void domain_update_node_affinity(struct domain *d) -{ - cpumask_var_t dom_cpumask, dom_cpumask_soft; - cpumask_t *dom_affinity; - const cpumask_t *online; - struct sched_unit *unit; - unsigned int cpu; - - /* Do we have vcpus already? If not, no need to update node-affinity. */ - if ( !d->vcpu || !d->vcpu[0] ) - return; - - if ( !zalloc_cpumask_var(&dom_cpumask) ) - return; - if ( !zalloc_cpumask_var(&dom_cpumask_soft) ) - { - free_cpumask_var(dom_cpumask); - return; - } - - online = cpupool_domain_master_cpumask(d); - - spin_lock(&d->node_affinity_lock); - - /* - * If d->auto_node_affinity is true, let's compute the domain's - * node-affinity and update d->node_affinity accordingly. if false, - * just leave d->auto_node_affinity alone. - */ - if ( d->auto_node_affinity ) - { - /* - * We want the narrowest possible set of pcpus (to get the narowest - * possible set of nodes). What we need is the cpumask of where the - * domain can run (the union of the hard affinity of all its vcpus), - * and the full mask of where it would prefer to run (the union of - * the soft affinity of all its various vcpus). Let's build them. - */ - for_each_sched_unit ( d, unit ) - { - cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity); - cpumask_or(dom_cpumask_soft, dom_cpumask_soft, - unit->cpu_soft_affinity); - } - /* Filter out non-online cpus */ - cpumask_and(dom_cpumask, dom_cpumask, online); - ASSERT(!cpumask_empty(dom_cpumask)); - /* And compute the intersection between hard, online and soft */ - cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask); - - /* - * If not empty, the intersection of hard, soft and online is the - * narrowest set we want. If empty, we fall back to hard&online. - */ - dom_affinity = cpumask_empty(dom_cpumask_soft) ? - dom_cpumask : dom_cpumask_soft; - - nodes_clear(d->node_affinity); - for_each_cpu ( cpu, dom_affinity ) - node_set(cpu_to_node(cpu), d->node_affinity); - } - - spin_unlock(&d->node_affinity_lock); - - free_cpumask_var(dom_cpumask_soft); - free_cpumask_var(dom_cpumask); -} - - int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity) { /* Being disjoint with the system is just wrong. */ diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 650310e874..8b819f56e5 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -11,7 +11,6 @@ #include <xen/err.h> #include <xen/mm.h> #include <xen/sched.h> -#include <xen/sched-if.h> #include <xen/domain.h> #include <xen/event.h> #include <xen/grant_table.h> @@ -65,9 +64,9 @@ static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap, return err; } -static int xenctl_bitmap_to_bitmap(unsigned long *bitmap, - const struct xenctl_bitmap *xenctl_bitmap, - unsigned int nbits) +int xenctl_bitmap_to_bitmap(unsigned long *bitmap, + const struct xenctl_bitmap *xenctl_bitmap, + unsigned int nbits) { unsigned int guest_bytes, copy_bytes; int err = 0; @@ -200,7 +199,7 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info) info->shared_info_frame = mfn_to_gmfn(d, virt_to_mfn(d->shared_info)); BUG_ON(SHARED_M2P(info->shared_info_frame)); - info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE; + info->cpupool = cpupool_get_id(d); memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t)); @@ -234,16 +233,6 @@ void domctl_lock_release(void) spin_unlock(¤t->domain->hypercall_deadlock_mutex); } -static inline -int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff) -{ - return vcpuaff->flags == 0 || - ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) && - guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) || - ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) && - guest_handle_is_null(vcpuaff->cpumap_soft.bitmap)); -} - void vnuma_destroy(struct vnuma_info *vnuma) { if ( vnuma ) @@ -608,122 +597,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) case XEN_DOMCTL_setvcpuaffinity: case XEN_DOMCTL_getvcpuaffinity: - { - struct vcpu *v; - const struct sched_unit *unit; - struct xen_domctl_vcpuaffinity *vcpuaff = &op->u.vcpuaffinity; - - ret = -EINVAL; - if ( vcpuaff->vcpu >= d->max_vcpus ) - break; - - ret = -ESRCH; - if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL ) - break; - - unit = v->sched_unit; - ret = -EINVAL; - if ( vcpuaffinity_params_invalid(vcpuaff) ) - break; - - if ( op->cmd == XEN_DOMCTL_setvcpuaffinity ) - { - cpumask_var_t new_affinity, old_affinity; - cpumask_t *online = cpupool_domain_master_cpumask(v->domain); - - /* - * We want to be able to restore hard affinity if we are trying - * setting both and changing soft affinity (which happens later, - * when hard affinity has been succesfully chaged already) fails. - */ - if ( !alloc_cpumask_var(&old_affinity) ) - { - ret = -ENOMEM; - break; - } - cpumask_copy(old_affinity, unit->cpu_hard_affinity); - - if ( !alloc_cpumask_var(&new_affinity) ) - { - free_cpumask_var(old_affinity); - ret = -ENOMEM; - break; - } - - /* Undo a stuck SCHED_pin_override? */ - if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE ) - vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE); - - ret = 0; - - /* - * We both set a new affinity and report back to the caller what - * the scheduler will be effectively using. - */ - if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) - { - ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity), - &vcpuaff->cpumap_hard, - nr_cpu_ids); - if ( !ret ) - ret = vcpu_set_hard_affinity(v, new_affinity); - if ( ret ) - goto setvcpuaffinity_out; - - /* - * For hard affinity, what we return is the intersection of - * cpupool's online mask and the new hard affinity. - */ - cpumask_and(new_affinity, online, unit->cpu_hard_affinity); - ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard, - new_affinity); - } - if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT ) - { - ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity), - &vcpuaff->cpumap_soft, - nr_cpu_ids); - if ( !ret) - ret = vcpu_set_soft_affinity(v, new_affinity); - if ( ret ) - { - /* - * Since we're returning error, the caller expects nothing - * happened, so we rollback the changes to hard affinity - * (if any). - */ - if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) - vcpu_set_hard_affinity(v, old_affinity); - goto setvcpuaffinity_out; - } - - /* - * For soft affinity, we return the intersection between the - * new soft affinity, the cpupool's online map and the (new) - * hard affinity. - */ - cpumask_and(new_affinity, new_affinity, online); - cpumask_and(new_affinity, new_affinity, - unit->cpu_hard_affinity); - ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft, - new_affinity); - } - - setvcpuaffinity_out: - free_cpumask_var(new_affinity); - free_cpumask_var(old_affinity); - } - else - { - if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) - ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard, - unit->cpu_hard_affinity); - if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT ) - ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft, - unit->cpu_soft_affinity); - } + ret = vcpu_affinity_domctl(d, op->cmd, &op->u.vcpuaffinity); break; - } case XEN_DOMCTL_scheduler_op: ret = sched_adjust(d, &op->u.scheduler_op); diff --git a/xen/common/sched/arinc653.c b/xen/common/sched/arinc653.c index 565575c326..8895d92b5e 100644 --- a/xen/common/sched/arinc653.c +++ b/xen/common/sched/arinc653.c @@ -26,7 +26,6 @@ #include <xen/lib.h> #include <xen/sched.h> -#include <xen/sched-if.h> #include <xen/timer.h> #include <xen/softirq.h> #include <xen/time.h> @@ -35,6 +34,8 @@ #include <xen/guest_access.h> #include <public/sysctl.h> +#include "private.h" + /************************************************************************** * Private Macros * **************************************************************************/ diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c index 4d8eb4c617..2fae959e90 100644 --- a/xen/common/sched/core.c +++ b/xen/common/sched/core.c @@ -23,7 +23,6 @@ #include <xen/time.h> #include <xen/timer.h> #include <xen/perfc.h> -#include <xen/sched-if.h> #include <xen/softirq.h> #include <xen/trace.h> #include <xen/mm.h> @@ -38,6 +37,8 @@ #include <xsm/xsm.h> #include <xen/err.h> +#include "private.h" + #ifdef CONFIG_XEN_GUEST #include <asm/guest.h> #else @@ -1607,6 +1608,194 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason) return ret; } +static inline +int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff) +{ + return vcpuaff->flags == 0 || + ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) && + guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) || + ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) && + guest_handle_is_null(vcpuaff->cpumap_soft.bitmap)); +} + +int vcpu_affinity_domctl(struct domain *d, uint32_t cmd, + struct xen_domctl_vcpuaffinity *vcpuaff) +{ + struct vcpu *v; + const struct sched_unit *unit; + int ret = 0; + + if ( vcpuaff->vcpu >= d->max_vcpus ) + return -EINVAL; + + if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL ) + return -ESRCH; + + if ( vcpuaffinity_params_invalid(vcpuaff) ) + return -EINVAL; + + unit = v->sched_unit; + + if ( cmd == XEN_DOMCTL_setvcpuaffinity ) + { + cpumask_var_t new_affinity, old_affinity; + cpumask_t *online = cpupool_domain_master_cpumask(v->domain); + + /* + * We want to be able to restore hard affinity if we are trying + * setting both and changing soft affinity (which happens later, + * when hard affinity has been succesfully chaged already) fails. + */ + if ( !alloc_cpumask_var(&old_affinity) ) + return -ENOMEM; + + cpumask_copy(old_affinity, unit->cpu_hard_affinity); + + if ( !alloc_cpumask_var(&new_affinity) ) + { + free_cpumask_var(old_affinity); + return -ENOMEM; + } + + /* Undo a stuck SCHED_pin_override? */ + if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE ) + vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE); + + ret = 0; + + /* + * We both set a new affinity and report back to the caller what + * the scheduler will be effectively using. + */ + if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) + { + ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity), + &vcpuaff->cpumap_hard, nr_cpu_ids); + if ( !ret ) + ret = vcpu_set_hard_affinity(v, new_affinity); + if ( ret ) + goto setvcpuaffinity_out; + + /* + * For hard affinity, what we return is the intersection of + * cpupool's online mask and the new hard affinity. + */ + cpumask_and(new_affinity, online, unit->cpu_hard_affinity); + ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard, new_affinity); + } + if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT ) + { + ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity), + &vcpuaff->cpumap_soft, nr_cpu_ids); + if ( !ret) + ret = vcpu_set_soft_affinity(v, new_affinity); + if ( ret ) + { + /* + * Since we're returning error, the caller expects nothing + * happened, so we rollback the changes to hard affinity + * (if any). + */ + if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) + vcpu_set_hard_affinity(v, old_affinity); + goto setvcpuaffinity_out; + } + + /* + * For soft affinity, we return the intersection between the + * new soft affinity, the cpupool's online map and the (new) + * hard affinity. + */ + cpumask_and(new_affinity, new_affinity, online); + cpumask_and(new_affinity, new_affinity, unit->cpu_hard_affinity); + ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft, new_affinity); + } + + setvcpuaffinity_out: + free_cpumask_var(new_affinity); + free_cpumask_var(old_affinity); + } + else + { + if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) + ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard, + unit->cpu_hard_affinity); + if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT ) + ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft, + unit->cpu_soft_affinity); + } + + return ret; +} + +void domain_update_node_affinity(struct domain *d) +{ + cpumask_var_t dom_cpumask, dom_cpumask_soft; + cpumask_t *dom_affinity; + const cpumask_t *online; + struct sched_unit *unit; + unsigned int cpu; + + /* Do we have vcpus already? If not, no need to update node-affinity. */ + if ( !d->vcpu || !d->vcpu[0] ) + return; + + if ( !zalloc_cpumask_var(&dom_cpumask) ) + return; + if ( !zalloc_cpumask_var(&dom_cpumask_soft) ) + { + free_cpumask_var(dom_cpumask); + return; + } + + online = cpupool_domain_master_cpumask(d); + + spin_lock(&d->node_affinity_lock); + + /* + * If d->auto_node_affinity is true, let's compute the domain's + * node-affinity and update d->node_affinity accordingly. if false, + * just leave d->auto_node_affinity alone. + */ + if ( d->auto_node_affinity ) + { + /* + * We want the narrowest possible set of pcpus (to get the narowest + * possible set of nodes). What we need is the cpumask of where the + * domain can run (the union of the hard affinity of all its vcpus), + * and the full mask of where it would prefer to run (the union of + * the soft affinity of all its various vcpus). Let's build them. + */ + for_each_sched_unit ( d, unit ) + { + cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity); + cpumask_or(dom_cpumask_soft, dom_cpumask_soft, + unit->cpu_soft_affinity); + } + /* Filter out non-online cpus */ + cpumask_and(dom_cpumask, dom_cpumask, online); + ASSERT(!cpumask_empty(dom_cpumask)); + /* And compute the intersection between hard, online and soft */ + cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask); + + /* + * If not empty, the intersection of hard, soft and online is the + * narrowest set we want. If empty, we fall back to hard&online. + */ + dom_affinity = cpumask_empty(dom_cpumask_soft) ? + dom_cpumask : dom_cpumask_soft; + + nodes_clear(d->node_affinity); + for_each_cpu ( cpu, dom_affinity ) + node_set(cpu_to_node(cpu), d->node_affinity); + } + + spin_unlock(&d->node_affinity_lock); + + free_cpumask_var(dom_cpumask_soft); + free_cpumask_var(dom_cpumask); +} + typedef long ret_t; #endif /* !COMPAT */ diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c index d66b541a94..3060a7144a 100644 --- a/xen/common/sched/cpupool.c +++ b/xen/common/sched/cpupool.c @@ -16,11 +16,12 @@ #include <xen/cpumask.h> #include <xen/percpu.h> #include <xen/sched.h> -#include <xen/sched-if.h> #include <xen/warning.h> #include <xen/keyhandler.h> #include <xen/cpu.h> +#include "private.h" + #define for_each_cpupool(ptr) \ for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next)) @@ -875,6 +876,16 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op) return ret; } +int cpupool_get_id(const struct domain *d) +{ + return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE; +} + +const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool) +{ + return pool->cpu_valid; +} + void dump_runq(unsigned char key) { unsigned long flags; diff --git a/xen/common/sched/credit.c b/xen/common/sched/credit.c index aa41a3301b..4329d9df56 100644 --- a/xen/common/sched/credit.c +++ b/xen/common/sched/credit.c @@ -15,7 +15,6 @@ #include <xen/delay.h> #include <xen/event.h> #include <xen/time.h> -#include <xen/sched-if.h> #include <xen/softirq.h> #include <asm/atomic.h> #include <asm/div64.h> @@ -24,6 +23,7 @@ #include <xen/trace.h> #include <xen/err.h> +#include "private.h" /* * Locking: diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c index f7c477053c..65e8ab052e 100644 --- a/xen/common/sched/credit2.c +++ b/xen/common/sched/credit2.c @@ -18,7 +18,6 @@ #include <xen/event.h> #include <xen/time.h> #include <xen/perfc.h> -#include <xen/sched-if.h> #include <xen/softirq.h> #include <asm/div64.h> #include <xen/errno.h> @@ -26,6 +25,8 @@ #include <xen/cpu.h> #include <xen/keyhandler.h> +#include "private.h" + /* Meant only for helping developers during debugging. */ /* #define d2printk printk */ #define d2printk(x...) diff --git a/xen/common/sched/null.c b/xen/common/sched/null.c index 3f3418c9b1..b99f1e3c65 100644 --- a/xen/common/sched/null.c +++ b/xen/common/sched/null.c @@ -29,10 +29,11 @@ */ #include <xen/sched.h> -#include <xen/sched-if.h> #include <xen/softirq.h> #include <xen/trace.h> +#include "private.h" + /* * null tracing events. Check include/public/trace.h for more details. */ diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h new file mode 100644 index 0000000000..a702fd23b1 --- /dev/null +++ b/xen/common/sched/private.h @@ -0,0 +1,622 @@ +/****************************************************************************** + * Additional declarations for the generic scheduler interface. This should + * only be included by files that implement conforming schedulers. + * + * Portions by Mark Williamson are (C) 2004 Intel Research Cambridge + */ + +#ifndef __XEN_SCHED_IF_H__ +#define __XEN_SCHED_IF_H__ + +#include <xen/percpu.h> +#include <xen/err.h> +#include <xen/rcupdate.h> + +/* cpus currently in no cpupool */ +extern cpumask_t cpupool_free_cpus; + +/* Scheduler generic parameters + * */ +#define SCHED_DEFAULT_RATELIMIT_US 1000 +extern int sched_ratelimit_us; + +/* Scheduling resource mask. */ +extern cpumask_t sched_res_mask; + +/* Number of vcpus per struct sched_unit. */ +enum sched_gran { + SCHED_GRAN_cpu, + SCHED_GRAN_core, + SCHED_GRAN_socket +}; + +/* + * In order to allow a scheduler to remap the lock->cpu mapping, + * we have a per-cpu pointer, along with a pre-allocated set of + * locks. The generic schedule init code will point each schedule lock + * pointer to the schedule lock; if the scheduler wants to remap them, + * it can simply modify the schedule locks. + * + * For cache betterness, keep the actual lock in the same cache area + * as the rest of the struct. Just have the scheduler point to the + * one it wants (This may be the one right in front of it).*/ +struct sched_resource { + struct scheduler *scheduler; + struct cpupool *cpupool; + spinlock_t *schedule_lock, + _lock; + struct sched_unit *curr; + struct sched_unit *sched_unit_idle; + struct sched_unit *prev; + void *sched_priv; + struct timer s_timer; /* scheduling timer */ + + /* Cpu with lowest id in scheduling resource. */ + unsigned int master_cpu; + unsigned int granularity; + cpumask_var_t cpus; /* cpus covered by this struct */ + struct rcu_head rcu; +}; + +DECLARE_PER_CPU(struct sched_resource *, sched_res); +extern rcu_read_lock_t sched_res_rculock; + +static inline struct sched_resource *get_sched_res(unsigned int cpu) +{ + return rcu_dereference(per_cpu(sched_res, cpu)); +} + +static inline void set_sched_res(unsigned int cpu, struct sched_resource *res) +{ + rcu_assign_pointer(per_cpu(sched_res, cpu), res); +} + +static inline struct sched_unit *curr_on_cpu(unsigned int cpu) +{ + return get_sched_res(cpu)->curr; +} + +static inline bool is_idle_unit(const struct sched_unit *unit) +{ + return is_idle_vcpu(unit->vcpu_list); +} + +/* Returns true if at least one vcpu of the unit is online. */ +static inline bool is_unit_online(const struct sched_unit *unit) +{ + const struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + if ( is_vcpu_online(v) ) + return true; + + return false; +} + +static inline unsigned int unit_running(const struct sched_unit *unit) +{ + return unit->runstate_cnt[RUNSTATE_running]; +} + +/* Returns true if at least one vcpu of the unit is runnable. */ +static inline bool unit_runnable(const struct sched_unit *unit) +{ + const struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + if ( vcpu_runnable(v) ) + return true; + + return false; +} + +static inline int vcpu_runstate_blocked(const struct vcpu *v) +{ + return (v->pause_flags & VPF_blocked) ? RUNSTATE_blocked : RUNSTATE_offline; +} + +/* + * Returns whether a sched_unit is runnable and sets new_state for each of its + * vcpus. It is mandatory to determine the new runstate for all vcpus of a unit + * without dropping the schedule lock (which happens when synchronizing the + * context switch of the vcpus of a unit) in order to avoid races with e.g. + * vcpu_sleep(). + */ +static inline bool unit_runnable_state(const struct sched_unit *unit) +{ + struct vcpu *v; + bool runnable, ret = false; + + if ( is_idle_unit(unit) ) + return true; + + for_each_sched_unit_vcpu ( unit, v ) + { + runnable = vcpu_runnable(v); + + v->new_state = runnable ? RUNSTATE_running : vcpu_runstate_blocked(v); + + if ( runnable ) + ret = true; + } + + return ret; +} + +static inline void sched_set_res(struct sched_unit *unit, + struct sched_resource *res) +{ + unsigned int cpu = cpumask_first(res->cpus); + struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + { + ASSERT(cpu < nr_cpu_ids); + v->processor = cpu; + cpu = cpumask_next(cpu, res->cpus); + } + + unit->res = res; +} + +/* Return master cpu of the scheduling resource the unit is assigned to. */ +static inline unsigned int sched_unit_master(const struct sched_unit *unit) +{ + return unit->res->master_cpu; +} + +/* Set a bit in pause_flags of all vcpus of a unit. */ +static inline void sched_set_pause_flags(struct sched_unit *unit, + unsigned int bit) +{ + struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + __set_bit(bit, &v->pause_flags); +} + +/* Clear a bit in pause_flags of all vcpus of a unit. */ +static inline void sched_clear_pause_flags(struct sched_unit *unit, + unsigned int bit) +{ + struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + __clear_bit(bit, &v->pause_flags); +} + +/* Set a bit in pause_flags of all vcpus of a unit via atomic updates. */ +static inline void sched_set_pause_flags_atomic(struct sched_unit *unit, + unsigned int bit) +{ + struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + set_bit(bit, &v->pause_flags); +} + +/* Clear a bit in pause_flags of all vcpus of a unit via atomic updates. */ +static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit, + unsigned int bit) +{ + struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + clear_bit(bit, &v->pause_flags); +} + +static inline struct sched_unit *sched_idle_unit(unsigned int cpu) +{ + return get_sched_res(cpu)->sched_unit_idle; +} + +static inline unsigned int sched_get_resource_cpu(unsigned int cpu) +{ + return get_sched_res(cpu)->master_cpu; +} + +/* + * Scratch space, for avoiding having too many cpumask_t on the stack. + * Within each scheduler, when using the scratch mask of one pCPU: + * - the pCPU must belong to the scheduler, + * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue + * lock). + */ +DECLARE_PER_CPU(cpumask_t, cpumask_scratch); +#define cpumask_scratch (&this_cpu(cpumask_scratch)) +#define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c)) + +#define sched_lock(kind, param, cpu, irq, arg...) \ +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ +{ \ + for ( ; ; ) \ + { \ + spinlock_t *lock = get_sched_res(cpu)->schedule_lock; \ + /* \ + * v->processor may change when grabbing the lock; but \ + * per_cpu(v->processor) may also change, if changing cpu pool \ + * also changes the scheduler lock. Retry until they match. \ + * \ + * It may also be the case that v->processor may change but the \ + * lock may be the same; this will succeed in that case. \ + */ \ + spin_lock##irq(lock, ## arg); \ + if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \ + return lock; \ + spin_unlock##irq(lock, ## arg); \ + } \ +} + +#define sched_unlock(kind, param, cpu, irq, arg...) \ +static inline void kind##_schedule_unlock##irq(spinlock_t *lock \ + EXTRA_TYPE(arg), param) \ +{ \ + ASSERT(lock == get_sched_res(cpu)->schedule_lock); \ + spin_unlock##irq(lock, ## arg); \ +} + +#define EXTRA_TYPE(arg) +sched_lock(pcpu, unsigned int cpu, cpu, ) +sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, ) +sched_lock(pcpu, unsigned int cpu, cpu, _irq) +sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irq) +sched_unlock(pcpu, unsigned int cpu, cpu, ) +sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, ) +sched_unlock(pcpu, unsigned int cpu, cpu, _irq) +sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irq) +#undef EXTRA_TYPE + +#define EXTRA_TYPE(arg) , unsigned long arg +#define spin_unlock_irqsave spin_unlock_irqrestore +sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags) +sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irqsave, *flags) +#undef spin_unlock_irqsave +sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags) +sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irqrestore, flags) +#undef EXTRA_TYPE + +#undef sched_unlock +#undef sched_lock + +static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu) +{ + spinlock_t *lock = get_sched_res(cpu)->schedule_lock; + + if ( !spin_trylock(lock) ) + return NULL; + if ( lock == get_sched_res(cpu)->schedule_lock ) + return lock; + spin_unlock(lock); + return NULL; +} + +struct scheduler { + char *name; /* full name for this scheduler */ + char *opt_name; /* option name for this scheduler */ + unsigned int sched_id; /* ID for this scheduler */ + void *sched_data; /* global data pointer */ + + int (*global_init) (void); + + int (*init) (struct scheduler *); + void (*deinit) (struct scheduler *); + + void (*free_udata) (const struct scheduler *, void *); + void * (*alloc_udata) (const struct scheduler *, + struct sched_unit *, void *); + void (*free_pdata) (const struct scheduler *, void *, int); + void * (*alloc_pdata) (const struct scheduler *, int); + void (*init_pdata) (const struct scheduler *, void *, int); + void (*deinit_pdata) (const struct scheduler *, void *, int); + + /* Returns ERR_PTR(-err) for error, NULL for 'nothing needed'. */ + void * (*alloc_domdata) (const struct scheduler *, struct domain *); + /* Idempotent. */ + void (*free_domdata) (const struct scheduler *, void *); + + spinlock_t * (*switch_sched) (struct scheduler *, unsigned int, + void *, void *); + + /* Activate / deactivate units in a cpu pool */ + void (*insert_unit) (const struct scheduler *, + struct sched_unit *); + void (*remove_unit) (const struct scheduler *, + struct sched_unit *); + + void (*sleep) (const struct scheduler *, + struct sched_unit *); + void (*wake) (const struct scheduler *, + struct sched_unit *); + void (*yield) (const struct scheduler *, + struct sched_unit *); + void (*context_saved) (const struct scheduler *, + struct sched_unit *); + + void (*do_schedule) (const struct scheduler *, + struct sched_unit *, s_time_t, + bool tasklet_work_scheduled); + + struct sched_resource *(*pick_resource)(const struct scheduler *, + const struct sched_unit *); + void (*migrate) (const struct scheduler *, + struct sched_unit *, unsigned int); + int (*adjust) (const struct scheduler *, struct domain *, + struct xen_domctl_scheduler_op *); + void (*adjust_affinity)(const struct scheduler *, + struct sched_unit *, + const struct cpumask *, + const struct cpumask *); + int (*adjust_global) (const struct scheduler *, + struct xen_sysctl_scheduler_op *); + void (*dump_settings) (const struct scheduler *); + void (*dump_cpu_state) (const struct scheduler *, int); +}; + +static inline int sched_init(struct scheduler *s) +{ + return s->init(s); +} + +static inline void sched_deinit(struct scheduler *s) +{ + s->deinit(s); +} + +static inline spinlock_t *sched_switch_sched(struct scheduler *s, + unsigned int cpu, + void *pdata, void *vdata) +{ + return s->switch_sched(s, cpu, pdata, vdata); +} + +static inline void sched_dump_settings(const struct scheduler *s) +{ + if ( s->dump_settings ) + s->dump_settings(s); +} + +static inline void sched_dump_cpu_state(const struct scheduler *s, int cpu) +{ + if ( s->dump_cpu_state ) + s->dump_cpu_state(s, cpu); +} + +static inline void *sched_alloc_domdata(const struct scheduler *s, + struct domain *d) +{ + return s->alloc_domdata ? s->alloc_domdata(s, d) : NULL; +} + +static inline void sched_free_domdata(const struct scheduler *s, + void *data) +{ + ASSERT(s->free_domdata || !data); + if ( s->free_domdata ) + s->free_domdata(s, data); +} + +static inline void *sched_alloc_pdata(const struct scheduler *s, int cpu) +{ + return s->alloc_pdata ? s->alloc_pdata(s, cpu) : NULL; +} + +static inline void sched_free_pdata(const struct scheduler *s, void *data, + int cpu) +{ + ASSERT(s->free_pdata || !data); + if ( s->free_pdata ) + s->free_pdata(s, data, cpu); +} + +static inline void sched_init_pdata(const struct scheduler *s, void *data, + int cpu) +{ + if ( s->init_pdata ) + s->init_pdata(s, data, cpu); +} + +static inline void sched_deinit_pdata(const struct scheduler *s, void *data, + int cpu) +{ + if ( s->deinit_pdata ) + s->deinit_pdata(s, data, cpu); +} + +static inline void *sched_alloc_udata(const struct scheduler *s, + struct sched_unit *unit, void *dom_data) +{ + return s->alloc_udata(s, unit, dom_data); +} + +static inline void sched_free_udata(const struct scheduler *s, void *data) +{ + s->free_udata(s, data); +} + +static inline void sched_insert_unit(const struct scheduler *s, + struct sched_unit *unit) +{ + if ( s->insert_unit ) + s->insert_unit(s, unit); +} + +static inline void sched_remove_unit(const struct scheduler *s, + struct sched_unit *unit) +{ + if ( s->remove_unit ) + s->remove_unit(s, unit); +} + +static inline void sched_sleep(const struct scheduler *s, + struct sched_unit *unit) +{ + if ( s->sleep ) + s->sleep(s, unit); +} + +static inline void sched_wake(const struct scheduler *s, + struct sched_unit *unit) +{ + if ( s->wake ) + s->wake(s, unit); +} + +static inline void sched_yield(const struct scheduler *s, + struct sched_unit *unit) +{ + if ( s->yield ) + s->yield(s, unit); +} + +static inline void sched_context_saved(const struct scheduler *s, + struct sched_unit *unit) +{ + if ( s->context_saved ) + s->context_saved(s, unit); +} + +static inline void sched_migrate(const struct scheduler *s, + struct sched_unit *unit, unsigned int cpu) +{ + if ( s->migrate ) + s->migrate(s, unit, cpu); + else + sched_set_res(unit, get_sched_res(cpu)); +} + +static inline struct sched_resource *sched_pick_resource( + const struct scheduler *s, const struct sched_unit *unit) +{ + return s->pick_resource(s, unit); +} + +static inline void sched_adjust_affinity(const struct scheduler *s, + struct sched_unit *unit, + const cpumask_t *hard, + const cpumask_t *soft) +{ + if ( s->adjust_affinity ) + s->adjust_affinity(s, unit, hard, soft); +} + +static inline int sched_adjust_dom(const struct scheduler *s, struct domain *d, + struct xen_domctl_scheduler_op *op) +{ + return s->adjust ? s->adjust(s, d, op) : 0; +} + +static inline int sched_adjust_cpupool(const struct scheduler *s, + struct xen_sysctl_scheduler_op *op) +{ + return s->adjust_global ? s->adjust_global(s, op) : 0; +} + +static inline void sched_unit_pause_nosync(const struct sched_unit *unit) +{ + struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + vcpu_pause_nosync(v); +} + +static inline void sched_unit_unpause(const struct sched_unit *unit) +{ + struct vcpu *v; + + for_each_sched_unit_vcpu ( unit, v ) + vcpu_unpause(v); +} + +#define REGISTER_SCHEDULER(x) static const struct scheduler *x##_entry \ + __used_section(".data.schedulers") = &x; + +struct cpupool +{ + int cpupool_id; + unsigned int n_dom; + cpumask_var_t cpu_valid; /* all cpus assigned to pool */ + cpumask_var_t res_valid; /* all scheduling resources of pool */ + struct cpupool *next; + struct scheduler *sched; + atomic_t refcnt; + enum sched_gran gran; +}; + +static inline cpumask_t *cpupool_domain_master_cpumask(const struct domain *d) +{ + /* + * d->cpupool is NULL only for the idle domain, and no one should + * be interested in calling this for the idle domain. + */ + ASSERT(d->cpupool != NULL); + return d->cpupool->res_valid; +} + +unsigned int cpupool_get_granularity(const struct cpupool *c); + +/* + * Hard and soft affinity load balancing. + * + * Idea is each vcpu has some pcpus that it prefers, some that it does not + * prefer but is OK with, and some that it cannot run on at all. The first + * set of pcpus are the ones that are both in the soft affinity *and* in the + * hard affinity; the second set of pcpus are the ones that are in the hard + * affinity but *not* in the soft affinity; the third set of pcpus are the + * ones that are not in the hard affinity. + * + * We implement a two step balancing logic. Basically, every time there is + * the need to decide where to run a vcpu, we first check the soft affinity + * (well, actually, the && between soft and hard affinity), to see if we can + * send it where it prefers to (and can) run on. However, if the first step + * does not find any suitable and free pcpu, we fall back checking the hard + * affinity. + */ +#define BALANCE_SOFT_AFFINITY 0 +#define BALANCE_HARD_AFFINITY 1 + +#define for_each_affinity_balance_step(step) \ + for ( (step) = 0; (step) <= BALANCE_HARD_AFFINITY; (step)++ ) + +/* + * Hard affinity balancing is always necessary and must never be skipped. + * But soft affinity need only be considered when it has a functionally + * different effect than other constraints (such as hard affinity, cpus + * online, or cpupools). + * + * Soft affinity only needs to be considered if: + * * The cpus in the cpupool are not a subset of soft affinity + * * The hard affinity is not a subset of soft affinity + * * There is an overlap between the soft and hard affinity masks + */ +static inline int has_soft_affinity(const struct sched_unit *unit) +{ + return unit->soft_aff_effective && + !cpumask_subset(cpupool_domain_master_cpumask(unit->domain), + unit->cpu_soft_affinity); +} + +/* + * This function copies in mask the cpumask that should be used for a + * particular affinity balancing step. For the soft affinity one, the pcpus + * that are not part of vc's hard affinity are filtered out from the result, + * to avoid running a vcpu where it would like, but is not allowed to! + */ +static inline void +affinity_balance_cpumask(const struct sched_unit *unit, int step, + cpumask_t *mask) +{ + if ( step == BALANCE_SOFT_AFFINITY ) + { + cpumask_and(mask, unit->cpu_soft_affinity, unit->cpu_hard_affinity); + + if ( unlikely(cpumask_empty(mask)) ) + cpumask_copy(mask, unit->cpu_hard_affinity); + } + else /* step == BALANCE_HARD_AFFINITY */ + cpumask_copy(mask, unit->cpu_hard_affinity); +} + +void sched_rm_cpu(unsigned int cpu); +const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu); + +#endif /* __XEN_SCHED_IF_H__ */ diff --git a/xen/common/sched/rt.c b/xen/common/sched/rt.c index c40a7e4990..a7125aef15 100644 --- a/xen/common/sched/rt.c +++ b/xen/common/sched/rt.c @@ -20,7 +20,6 @@ #include <xen/time.h> #include <xen/timer.h> #include <xen/perfc.h> -#include <xen/sched-if.h> #include <xen/softirq.h> #include <asm/atomic.h> #include <xen/errno.h> @@ -31,6 +30,8 @@ #include <xen/err.h> #include <xen/guest_access.h> +#include "private.h" + /* * TODO: * diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index 1cb205d977..7e51d361de 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -27,6 +27,9 @@ struct xen_domctl_getdomaininfo; void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info); void arch_get_domain_info(const struct domain *d, struct xen_domctl_getdomaininfo *info); +int xenctl_bitmap_to_bitmap(unsigned long *bitmap, + const struct xenctl_bitmap *xenctl_bitmap, + unsigned int nbits); /* * Arch-specifics. diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h deleted file mode 100644 index b0ac54e63d..0000000000 --- a/xen/include/xen/sched-if.h +++ /dev/null @@ -1,625 +0,0 @@ -/****************************************************************************** - * Additional declarations for the generic scheduler interface. This should - * only be included by files that implement conforming schedulers. - * - * Portions by Mark Williamson are (C) 2004 Intel Research Cambridge - */ - -#ifndef __XEN_SCHED_IF_H__ -#define __XEN_SCHED_IF_H__ - -#include <xen/percpu.h> -#include <xen/err.h> -#include <xen/rcupdate.h> - -/* A global pointer to the initial cpupool (POOL0). */ -extern struct cpupool *cpupool0; - -/* cpus currently in no cpupool */ -extern cpumask_t cpupool_free_cpus; - -/* Scheduler generic parameters - * */ -#define SCHED_DEFAULT_RATELIMIT_US 1000 -extern int sched_ratelimit_us; - -/* Scheduling resource mask. */ -extern cpumask_t sched_res_mask; - -/* Number of vcpus per struct sched_unit. */ -enum sched_gran { - SCHED_GRAN_cpu, - SCHED_GRAN_core, - SCHED_GRAN_socket -}; - -/* - * In order to allow a scheduler to remap the lock->cpu mapping, - * we have a per-cpu pointer, along with a pre-allocated set of - * locks. The generic schedule init code will point each schedule lock - * pointer to the schedule lock; if the scheduler wants to remap them, - * it can simply modify the schedule locks. - * - * For cache betterness, keep the actual lock in the same cache area - * as the rest of the struct. Just have the scheduler point to the - * one it wants (This may be the one right in front of it).*/ -struct sched_resource { - struct scheduler *scheduler; - struct cpupool *cpupool; - spinlock_t *schedule_lock, - _lock; - struct sched_unit *curr; - struct sched_unit *sched_unit_idle; - struct sched_unit *prev; - void *sched_priv; - struct timer s_timer; /* scheduling timer */ - - /* Cpu with lowest id in scheduling resource. */ - unsigned int master_cpu; - unsigned int granularity; - cpumask_var_t cpus; /* cpus covered by this struct */ - struct rcu_head rcu; -}; - -DECLARE_PER_CPU(struct sched_resource *, sched_res); -extern rcu_read_lock_t sched_res_rculock; - -static inline struct sched_resource *get_sched_res(unsigned int cpu) -{ - return rcu_dereference(per_cpu(sched_res, cpu)); -} - -static inline void set_sched_res(unsigned int cpu, struct sched_resource *res) -{ - rcu_assign_pointer(per_cpu(sched_res, cpu), res); -} - -static inline struct sched_unit *curr_on_cpu(unsigned int cpu) -{ - return get_sched_res(cpu)->curr; -} - -static inline bool is_idle_unit(const struct sched_unit *unit) -{ - return is_idle_vcpu(unit->vcpu_list); -} - -/* Returns true if at least one vcpu of the unit is online. */ -static inline bool is_unit_online(const struct sched_unit *unit) -{ - const struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - if ( is_vcpu_online(v) ) - return true; - - return false; -} - -static inline unsigned int unit_running(const struct sched_unit *unit) -{ - return unit->runstate_cnt[RUNSTATE_running]; -} - -/* Returns true if at least one vcpu of the unit is runnable. */ -static inline bool unit_runnable(const struct sched_unit *unit) -{ - const struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - if ( vcpu_runnable(v) ) - return true; - - return false; -} - -static inline int vcpu_runstate_blocked(const struct vcpu *v) -{ - return (v->pause_flags & VPF_blocked) ? RUNSTATE_blocked : RUNSTATE_offline; -} - -/* - * Returns whether a sched_unit is runnable and sets new_state for each of its - * vcpus. It is mandatory to determine the new runstate for all vcpus of a unit - * without dropping the schedule lock (which happens when synchronizing the - * context switch of the vcpus of a unit) in order to avoid races with e.g. - * vcpu_sleep(). - */ -static inline bool unit_runnable_state(const struct sched_unit *unit) -{ - struct vcpu *v; - bool runnable, ret = false; - - if ( is_idle_unit(unit) ) - return true; - - for_each_sched_unit_vcpu ( unit, v ) - { - runnable = vcpu_runnable(v); - - v->new_state = runnable ? RUNSTATE_running : vcpu_runstate_blocked(v); - - if ( runnable ) - ret = true; - } - - return ret; -} - -static inline void sched_set_res(struct sched_unit *unit, - struct sched_resource *res) -{ - unsigned int cpu = cpumask_first(res->cpus); - struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - { - ASSERT(cpu < nr_cpu_ids); - v->processor = cpu; - cpu = cpumask_next(cpu, res->cpus); - } - - unit->res = res; -} - -/* Return master cpu of the scheduling resource the unit is assigned to. */ -static inline unsigned int sched_unit_master(const struct sched_unit *unit) -{ - return unit->res->master_cpu; -} - -/* Set a bit in pause_flags of all vcpus of a unit. */ -static inline void sched_set_pause_flags(struct sched_unit *unit, - unsigned int bit) -{ - struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - __set_bit(bit, &v->pause_flags); -} - -/* Clear a bit in pause_flags of all vcpus of a unit. */ -static inline void sched_clear_pause_flags(struct sched_unit *unit, - unsigned int bit) -{ - struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - __clear_bit(bit, &v->pause_flags); -} - -/* Set a bit in pause_flags of all vcpus of a unit via atomic updates. */ -static inline void sched_set_pause_flags_atomic(struct sched_unit *unit, - unsigned int bit) -{ - struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - set_bit(bit, &v->pause_flags); -} - -/* Clear a bit in pause_flags of all vcpus of a unit via atomic updates. */ -static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit, - unsigned int bit) -{ - struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - clear_bit(bit, &v->pause_flags); -} - -static inline struct sched_unit *sched_idle_unit(unsigned int cpu) -{ - return get_sched_res(cpu)->sched_unit_idle; -} - -static inline unsigned int sched_get_resource_cpu(unsigned int cpu) -{ - return get_sched_res(cpu)->master_cpu; -} - -/* - * Scratch space, for avoiding having too many cpumask_t on the stack. - * Within each scheduler, when using the scratch mask of one pCPU: - * - the pCPU must belong to the scheduler, - * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue - * lock). - */ -DECLARE_PER_CPU(cpumask_t, cpumask_scratch); -#define cpumask_scratch (&this_cpu(cpumask_scratch)) -#define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c)) - -#define sched_lock(kind, param, cpu, irq, arg...) \ -static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ -{ \ - for ( ; ; ) \ - { \ - spinlock_t *lock = get_sched_res(cpu)->schedule_lock; \ - /* \ - * v->processor may change when grabbing the lock; but \ - * per_cpu(v->processor) may also change, if changing cpu pool \ - * also changes the scheduler lock. Retry until they match. \ - * \ - * It may also be the case that v->processor may change but the \ - * lock may be the same; this will succeed in that case. \ - */ \ - spin_lock##irq(lock, ## arg); \ - if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \ - return lock; \ - spin_unlock##irq(lock, ## arg); \ - } \ -} - -#define sched_unlock(kind, param, cpu, irq, arg...) \ -static inline void kind##_schedule_unlock##irq(spinlock_t *lock \ - EXTRA_TYPE(arg), param) \ -{ \ - ASSERT(lock == get_sched_res(cpu)->schedule_lock); \ - spin_unlock##irq(lock, ## arg); \ -} - -#define EXTRA_TYPE(arg) -sched_lock(pcpu, unsigned int cpu, cpu, ) -sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, ) -sched_lock(pcpu, unsigned int cpu, cpu, _irq) -sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irq) -sched_unlock(pcpu, unsigned int cpu, cpu, ) -sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, ) -sched_unlock(pcpu, unsigned int cpu, cpu, _irq) -sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irq) -#undef EXTRA_TYPE - -#define EXTRA_TYPE(arg) , unsigned long arg -#define spin_unlock_irqsave spin_unlock_irqrestore -sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags) -sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irqsave, *flags) -#undef spin_unlock_irqsave -sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags) -sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irqrestore, flags) -#undef EXTRA_TYPE - -#undef sched_unlock -#undef sched_lock - -static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu) -{ - spinlock_t *lock = get_sched_res(cpu)->schedule_lock; - - if ( !spin_trylock(lock) ) - return NULL; - if ( lock == get_sched_res(cpu)->schedule_lock ) - return lock; - spin_unlock(lock); - return NULL; -} - -struct scheduler { - char *name; /* full name for this scheduler */ - char *opt_name; /* option name for this scheduler */ - unsigned int sched_id; /* ID for this scheduler */ - void *sched_data; /* global data pointer */ - - int (*global_init) (void); - - int (*init) (struct scheduler *); - void (*deinit) (struct scheduler *); - - void (*free_udata) (const struct scheduler *, void *); - void * (*alloc_udata) (const struct scheduler *, - struct sched_unit *, void *); - void (*free_pdata) (const struct scheduler *, void *, int); - void * (*alloc_pdata) (const struct scheduler *, int); - void (*init_pdata) (const struct scheduler *, void *, int); - void (*deinit_pdata) (const struct scheduler *, void *, int); - - /* Returns ERR_PTR(-err) for error, NULL for 'nothing needed'. */ - void * (*alloc_domdata) (const struct scheduler *, struct domain *); - /* Idempotent. */ - void (*free_domdata) (const struct scheduler *, void *); - - spinlock_t * (*switch_sched) (struct scheduler *, unsigned int, - void *, void *); - - /* Activate / deactivate units in a cpu pool */ - void (*insert_unit) (const struct scheduler *, - struct sched_unit *); - void (*remove_unit) (const struct scheduler *, - struct sched_unit *); - - void (*sleep) (const struct scheduler *, - struct sched_unit *); - void (*wake) (const struct scheduler *, - struct sched_unit *); - void (*yield) (const struct scheduler *, - struct sched_unit *); - void (*context_saved) (const struct scheduler *, - struct sched_unit *); - - void (*do_schedule) (const struct scheduler *, - struct sched_unit *, s_time_t, - bool tasklet_work_scheduled); - - struct sched_resource *(*pick_resource)(const struct scheduler *, - const struct sched_unit *); - void (*migrate) (const struct scheduler *, - struct sched_unit *, unsigned int); - int (*adjust) (const struct scheduler *, struct domain *, - struct xen_domctl_scheduler_op *); - void (*adjust_affinity)(const struct scheduler *, - struct sched_unit *, - const struct cpumask *, - const struct cpumask *); - int (*adjust_global) (const struct scheduler *, - struct xen_sysctl_scheduler_op *); - void (*dump_settings) (const struct scheduler *); - void (*dump_cpu_state) (const struct scheduler *, int); -}; - -static inline int sched_init(struct scheduler *s) -{ - return s->init(s); -} - -static inline void sched_deinit(struct scheduler *s) -{ - s->deinit(s); -} - -static inline spinlock_t *sched_switch_sched(struct scheduler *s, - unsigned int cpu, - void *pdata, void *vdata) -{ - return s->switch_sched(s, cpu, pdata, vdata); -} - -static inline void sched_dump_settings(const struct scheduler *s) -{ - if ( s->dump_settings ) - s->dump_settings(s); -} - -static inline void sched_dump_cpu_state(const struct scheduler *s, int cpu) -{ - if ( s->dump_cpu_state ) - s->dump_cpu_state(s, cpu); -} - -static inline void *sched_alloc_domdata(const struct scheduler *s, - struct domain *d) -{ - return s->alloc_domdata ? s->alloc_domdata(s, d) : NULL; -} - -static inline void sched_free_domdata(const struct scheduler *s, - void *data) -{ - ASSERT(s->free_domdata || !data); - if ( s->free_domdata ) - s->free_domdata(s, data); -} - -static inline void *sched_alloc_pdata(const struct scheduler *s, int cpu) -{ - return s->alloc_pdata ? s->alloc_pdata(s, cpu) : NULL; -} - -static inline void sched_free_pdata(const struct scheduler *s, void *data, - int cpu) -{ - ASSERT(s->free_pdata || !data); - if ( s->free_pdata ) - s->free_pdata(s, data, cpu); -} - -static inline void sched_init_pdata(const struct scheduler *s, void *data, - int cpu) -{ - if ( s->init_pdata ) - s->init_pdata(s, data, cpu); -} - -static inline void sched_deinit_pdata(const struct scheduler *s, void *data, - int cpu) -{ - if ( s->deinit_pdata ) - s->deinit_pdata(s, data, cpu); -} - -static inline void *sched_alloc_udata(const struct scheduler *s, - struct sched_unit *unit, void *dom_data) -{ - return s->alloc_udata(s, unit, dom_data); -} - -static inline void sched_free_udata(const struct scheduler *s, void *data) -{ - s->free_udata(s, data); -} - -static inline void sched_insert_unit(const struct scheduler *s, - struct sched_unit *unit) -{ - if ( s->insert_unit ) - s->insert_unit(s, unit); -} - -static inline void sched_remove_unit(const struct scheduler *s, - struct sched_unit *unit) -{ - if ( s->remove_unit ) - s->remove_unit(s, unit); -} - -static inline void sched_sleep(const struct scheduler *s, - struct sched_unit *unit) -{ - if ( s->sleep ) - s->sleep(s, unit); -} - -static inline void sched_wake(const struct scheduler *s, - struct sched_unit *unit) -{ - if ( s->wake ) - s->wake(s, unit); -} - -static inline void sched_yield(const struct scheduler *s, - struct sched_unit *unit) -{ - if ( s->yield ) - s->yield(s, unit); -} - -static inline void sched_context_saved(const struct scheduler *s, - struct sched_unit *unit) -{ - if ( s->context_saved ) - s->context_saved(s, unit); -} - -static inline void sched_migrate(const struct scheduler *s, - struct sched_unit *unit, unsigned int cpu) -{ - if ( s->migrate ) - s->migrate(s, unit, cpu); - else - sched_set_res(unit, get_sched_res(cpu)); -} - -static inline struct sched_resource *sched_pick_resource( - const struct scheduler *s, const struct sched_unit *unit) -{ - return s->pick_resource(s, unit); -} - -static inline void sched_adjust_affinity(const struct scheduler *s, - struct sched_unit *unit, - const cpumask_t *hard, - const cpumask_t *soft) -{ - if ( s->adjust_affinity ) - s->adjust_affinity(s, unit, hard, soft); -} - -static inline int sched_adjust_dom(const struct scheduler *s, struct domain *d, - struct xen_domctl_scheduler_op *op) -{ - return s->adjust ? s->adjust(s, d, op) : 0; -} - -static inline int sched_adjust_cpupool(const struct scheduler *s, - struct xen_sysctl_scheduler_op *op) -{ - return s->adjust_global ? s->adjust_global(s, op) : 0; -} - -static inline void sched_unit_pause_nosync(const struct sched_unit *unit) -{ - struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - vcpu_pause_nosync(v); -} - -static inline void sched_unit_unpause(const struct sched_unit *unit) -{ - struct vcpu *v; - - for_each_sched_unit_vcpu ( unit, v ) - vcpu_unpause(v); -} - -#define REGISTER_SCHEDULER(x) static const struct scheduler *x##_entry \ - __used_section(".data.schedulers") = &x; - -struct cpupool -{ - int cpupool_id; - unsigned int n_dom; - cpumask_var_t cpu_valid; /* all cpus assigned to pool */ - cpumask_var_t res_valid; /* all scheduling resources of pool */ - struct cpupool *next; - struct scheduler *sched; - atomic_t refcnt; - enum sched_gran gran; -}; - -static inline cpumask_t *cpupool_domain_master_cpumask(const struct domain *d) -{ - /* - * d->cpupool is NULL only for the idle domain, and no one should - * be interested in calling this for the idle domain. - */ - ASSERT(d->cpupool != NULL); - return d->cpupool->res_valid; -} - -unsigned int cpupool_get_granularity(const struct cpupool *c); - -/* - * Hard and soft affinity load balancing. - * - * Idea is each vcpu has some pcpus that it prefers, some that it does not - * prefer but is OK with, and some that it cannot run on at all. The first - * set of pcpus are the ones that are both in the soft affinity *and* in the - * hard affinity; the second set of pcpus are the ones that are in the hard - * affinity but *not* in the soft affinity; the third set of pcpus are the - * ones that are not in the hard affinity. - * - * We implement a two step balancing logic. Basically, every time there is - * the need to decide where to run a vcpu, we first check the soft affinity - * (well, actually, the && between soft and hard affinity), to see if we can - * send it where it prefers to (and can) run on. However, if the first step - * does not find any suitable and free pcpu, we fall back checking the hard - * affinity. - */ -#define BALANCE_SOFT_AFFINITY 0 -#define BALANCE_HARD_AFFINITY 1 - -#define for_each_affinity_balance_step(step) \ - for ( (step) = 0; (step) <= BALANCE_HARD_AFFINITY; (step)++ ) - -/* - * Hard affinity balancing is always necessary and must never be skipped. - * But soft affinity need only be considered when it has a functionally - * different effect than other constraints (such as hard affinity, cpus - * online, or cpupools). - * - * Soft affinity only needs to be considered if: - * * The cpus in the cpupool are not a subset of soft affinity - * * The hard affinity is not a subset of soft affinity - * * There is an overlap between the soft and hard affinity masks - */ -static inline int has_soft_affinity(const struct sched_unit *unit) -{ - return unit->soft_aff_effective && - !cpumask_subset(cpupool_domain_master_cpumask(unit->domain), - unit->cpu_soft_affinity); -} - -/* - * This function copies in mask the cpumask that should be used for a - * particular affinity balancing step. For the soft affinity one, the pcpus - * that are not part of vc's hard affinity are filtered out from the result, - * to avoid running a vcpu where it would like, but is not allowed to! - */ -static inline void -affinity_balance_cpumask(const struct sched_unit *unit, int step, - cpumask_t *mask) -{ - if ( step == BALANCE_SOFT_AFFINITY ) - { - cpumask_and(mask, unit->cpu_soft_affinity, unit->cpu_hard_affinity); - - if ( unlikely(cpumask_empty(mask)) ) - cpumask_copy(mask, unit->cpu_hard_affinity); - } - else /* step == BALANCE_HARD_AFFINITY */ - cpumask_copy(mask, unit->cpu_hard_affinity); -} - -void sched_rm_cpu(unsigned int cpu); -const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu); - -#endif /* __XEN_SCHED_IF_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index cc942a3621..2d8ff366bc 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -50,6 +50,9 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t); /* A global pointer to the hardware domain (usually DOM0). */ extern struct domain *hardware_domain; +/* A global pointer to the initial cpupool (POOL0). */ +extern struct cpupool *cpupool0; + #ifdef CONFIG_LATE_HWDOM extern domid_t hardware_domid; #else @@ -931,6 +934,8 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason); int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity); int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity); void restore_vcpu_affinity(struct domain *d); +int vcpu_affinity_domctl(struct domain *d, uint32_t cmd, + struct xen_domctl_vcpuaffinity *vcpuaff); void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate); uint64_t get_cpu_idle_time(unsigned int cpu); @@ -1068,6 +1073,8 @@ int cpupool_add_domain(struct domain *d, int poolid); void cpupool_rm_domain(struct domain *d); int cpupool_move_domain(struct domain *d, struct cpupool *c); int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op); +int cpupool_get_id(const struct domain *d); +const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool); void schedule_dump(struct cpupool *c); extern void dump_runq(unsigned char key); -- generated by git-patchbot for /home/xen/git/xen.git#staging _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |