[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5 14/19] xen/sched: protect scheduling resource via rcu
In order to be able to move cpus to cpupools with core scheduling active it is mandatory to merge multiple cpus into one scheduling resource or to split a scheduling resource with multiple cpus in it into multiple scheduling resources. This in turn requires to modify the cpu <-> scheduling resource relation. In order to be able to free unused resources protect struct sched_resource via RCU. This ensures there are no users left when freeing such a resource. Signed-off-by: Juergen Gross <jgross@xxxxxxxx> Reviewed-by: Dario Faggioli <dfaggioli@xxxxxxxx> --- V1: new patch --- xen/common/cpupool.c | 4 + xen/common/schedule.c | 187 ++++++++++++++++++++++++++++++++++++++++----- xen/include/xen/sched-if.h | 7 +- 3 files changed, 178 insertions(+), 20 deletions(-) diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c index 02825e779d..7228ca84b4 100644 --- a/xen/common/cpupool.c +++ b/xen/common/cpupool.c @@ -511,8 +511,10 @@ static int cpupool_cpu_add(unsigned int cpu) * (or unplugging would have failed) and that is the default behavior * anyway. */ + rcu_read_lock(&sched_res_rculock); get_sched_res(cpu)->cpupool = NULL; ret = cpupool_assign_cpu_locked(cpupool0, cpu); + rcu_read_unlock(&sched_res_rculock); spin_unlock(&cpupool_lock); @@ -597,7 +599,9 @@ static void cpupool_cpu_remove_forced(unsigned int cpu) } } + rcu_read_lock(&sched_res_rculock); sched_rm_cpu(cpu); + rcu_read_unlock(&sched_res_rculock); } /* diff --git a/xen/common/schedule.c b/xen/common/schedule.c index a96fc82282..1f23bf0e83 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -77,6 +77,7 @@ static void poll_timer_fn(void *data); /* This is global for now so that private implementations can reach it */ DEFINE_PER_CPU_READ_MOSTLY(struct sched_resource *, sched_res); static DEFINE_PER_CPU_READ_MOSTLY(unsigned int, sched_res_idx); +DEFINE_RCU_READ_LOCK(sched_res_rculock); /* Scratch space for cpumasks. */ DEFINE_PER_CPU(cpumask_t, cpumask_scratch); @@ -300,10 +301,12 @@ void sched_guest_idle(void (*idle) (void), unsigned int cpu) void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) { - spinlock_t *lock = likely(v == current) - ? NULL : unit_schedule_lock_irq(v->sched_unit); + spinlock_t *lock; s_time_t delta; + rcu_read_lock(&sched_res_rculock); + + lock = likely(v == current) ? NULL : unit_schedule_lock_irq(v->sched_unit); memcpy(runstate, &v->runstate, sizeof(*runstate)); delta = NOW() - runstate->state_entry_time; if ( delta > 0 ) @@ -311,6 +314,8 @@ void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) if ( unlikely(lock != NULL) ) unit_schedule_unlock_irq(lock, v->sched_unit); + + rcu_read_unlock(&sched_res_rculock); } uint64_t get_cpu_idle_time(unsigned int cpu) @@ -522,6 +527,8 @@ int sched_init_vcpu(struct vcpu *v) return 0; } + rcu_read_lock(&sched_res_rculock); + /* The first vcpu of an unit can be set via sched_set_res(). */ sched_set_res(unit, get_sched_res(processor)); @@ -529,6 +536,7 @@ int sched_init_vcpu(struct vcpu *v) if ( unit->priv == NULL ) { sched_free_unit(unit, v); + rcu_read_unlock(&sched_res_rculock); return 1; } @@ -555,6 +563,8 @@ int sched_init_vcpu(struct vcpu *v) sched_insert_unit(dom_scheduler(d), unit); } + rcu_read_unlock(&sched_res_rculock); + return 0; } @@ -583,6 +593,7 @@ int sched_move_domain(struct domain *d, struct cpupool *c) struct scheduler *old_ops; void *old_domdata; unsigned int gran = cpupool_get_granularity(c); + int ret = 0; for_each_vcpu ( d, v ) { @@ -590,15 +601,21 @@ int sched_move_domain(struct domain *d, struct cpupool *c) return -EBUSY; } + rcu_read_lock(&sched_res_rculock); + domdata = sched_alloc_domdata(c->sched, d); if ( IS_ERR(domdata) ) - return PTR_ERR(domdata); + { + ret = PTR_ERR(domdata); + goto out; + } unit_priv = xzalloc_array(void *, DIV_ROUND_UP(d->max_vcpus, gran)); if ( unit_priv == NULL ) { sched_free_domdata(c->sched, domdata); - return -ENOMEM; + ret = -ENOMEM; + goto out; } unit_idx = 0; @@ -611,7 +628,8 @@ int sched_move_domain(struct domain *d, struct cpupool *c) sched_free_udata(c->sched, unit_priv[unit_idx]); xfree(unit_priv); sched_free_domdata(c->sched, domdata); - return -ENOMEM; + ret = -ENOMEM; + goto out; } unit_idx++; } @@ -677,7 +695,10 @@ int sched_move_domain(struct domain *d, struct cpupool *c) xfree(unit_priv); - return 0; +out: + rcu_read_unlock(&sched_res_rculock); + + return ret; } void sched_destroy_vcpu(struct vcpu *v) @@ -695,9 +716,13 @@ void sched_destroy_vcpu(struct vcpu *v) */ if ( unit->vcpu_list == v ) { + rcu_read_lock(&sched_res_rculock); + sched_remove_unit(vcpu_scheduler(v), unit); sched_free_udata(vcpu_scheduler(v), unit->priv); sched_free_unit(unit, v); + + rcu_read_unlock(&sched_res_rculock); } } @@ -715,7 +740,12 @@ int sched_init_domain(struct domain *d, int poolid) SCHED_STAT_CRANK(dom_init); TRACE_1D(TRC_SCHED_DOM_ADD, d->domain_id); + rcu_read_lock(&sched_res_rculock); + sdom = sched_alloc_domdata(dom_scheduler(d), d); + + rcu_read_unlock(&sched_res_rculock); + if ( IS_ERR(sdom) ) return PTR_ERR(sdom); @@ -733,9 +763,13 @@ void sched_destroy_domain(struct domain *d) SCHED_STAT_CRANK(dom_destroy); TRACE_1D(TRC_SCHED_DOM_REM, d->domain_id); + rcu_read_lock(&sched_res_rculock); + sched_free_domdata(dom_scheduler(d), d->sched_priv); d->sched_priv = NULL; + rcu_read_unlock(&sched_res_rculock); + cpupool_rm_domain(d); } } @@ -770,11 +804,15 @@ void vcpu_sleep_nosync(struct vcpu *v) TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irqsave(v->sched_unit, &flags); vcpu_sleep_nosync_locked(v); unit_schedule_unlock_irqrestore(lock, flags, v->sched_unit); + + rcu_read_unlock(&sched_res_rculock); } void vcpu_sleep_sync(struct vcpu *v) @@ -795,6 +833,8 @@ void vcpu_wake(struct vcpu *v) TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irqsave(unit, &flags); if ( likely(vcpu_runnable(v)) ) @@ -820,6 +860,8 @@ void vcpu_wake(struct vcpu *v) } unit_schedule_unlock_irqrestore(lock, flags, unit); + + rcu_read_unlock(&sched_res_rculock); } void vcpu_unblock(struct vcpu *v) @@ -853,6 +895,8 @@ static void sched_unit_move_locked(struct sched_unit *unit, unsigned int old_cpu = unit->res->master_cpu; struct vcpu *v; + rcu_read_lock(&sched_res_rculock); + /* * Transfer urgency status to new CPU before switching CPUs, as * once the switch occurs, v->is_urgent is no longer protected by @@ -872,6 +916,8 @@ static void sched_unit_move_locked(struct sched_unit *unit, * pointer can't change while the current lock is held. */ sched_migrate(unit_scheduler(unit), unit, new_cpu); + + rcu_read_unlock(&sched_res_rculock); } /* @@ -1039,6 +1085,8 @@ void restore_vcpu_affinity(struct domain *d) ASSERT(system_state == SYS_STATE_resume); + rcu_read_lock(&sched_res_rculock); + for_each_sched_unit ( d, unit ) { spinlock_t *lock; @@ -1095,6 +1143,8 @@ void restore_vcpu_affinity(struct domain *d) sched_move_irqs(unit); } + rcu_read_unlock(&sched_res_rculock); + domain_update_node_affinity(d); } @@ -1110,9 +1160,11 @@ int cpu_disable_scheduler(unsigned int cpu) cpumask_t online_affinity; int ret = 0; + rcu_read_lock(&sched_res_rculock); + c = get_sched_res(cpu)->cpupool; if ( c == NULL ) - return ret; + goto out; for_each_domain_in_cpupool ( d, c ) { @@ -1170,6 +1222,9 @@ int cpu_disable_scheduler(unsigned int cpu) } } +out: + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1201,7 +1256,9 @@ static int cpu_disable_scheduler_check(unsigned int cpu) static void sched_set_affinity( struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft) { + rcu_read_lock(&sched_res_rculock); sched_adjust_affinity(dom_scheduler(unit->domain), unit, hard, soft); + rcu_read_unlock(&sched_res_rculock); if ( hard ) cpumask_copy(unit->cpu_hard_affinity, hard); @@ -1221,6 +1278,8 @@ static int vcpu_set_affinity( spinlock_t *lock; int ret = 0; + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irq(unit); if ( v->affinity_broken ) @@ -1249,6 +1308,8 @@ static int vcpu_set_affinity( sched_unit_migrate_finish(unit); + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1375,11 +1436,16 @@ static long do_poll(struct sched_poll *sched_poll) long vcpu_yield(void) { struct vcpu * v=current; - spinlock_t *lock = unit_schedule_lock_irq(v->sched_unit); + spinlock_t *lock; + + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irq(v->sched_unit); sched_yield(vcpu_scheduler(v), v->sched_unit); unit_schedule_unlock_irq(lock, v->sched_unit); + rcu_read_unlock(&sched_res_rculock); + SCHED_STAT_CRANK(vcpu_yield); TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id); @@ -1476,6 +1542,8 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason) int ret = -EINVAL; bool migrate; + rcu_read_lock(&sched_res_rculock); + lock = unit_schedule_lock_irq(unit); if ( cpu == NR_CPUS ) @@ -1515,6 +1583,8 @@ int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason) if ( migrate ) sched_unit_migrate_finish(unit); + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1726,9 +1796,13 @@ long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op) /* NB: the pluggable scheduler code needs to take care * of locking by itself. */ + rcu_read_lock(&sched_res_rculock); + if ( (ret = sched_adjust_dom(dom_scheduler(d), d, op)) == 0 ) TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id); + rcu_read_unlock(&sched_res_rculock); + return ret; } @@ -1749,9 +1823,13 @@ long sched_adjust_global(struct xen_sysctl_scheduler_op *op) if ( pool == NULL ) return -ESRCH; + rcu_read_lock(&sched_res_rculock); + rc = ((op->sched_id == pool->sched->sched_id) ? sched_adjust_cpupool(pool->sched, op) : -EINVAL); + rcu_read_unlock(&sched_res_rculock); + cpupool_put(pool); return rc; @@ -1971,7 +2049,11 @@ static void unit_context_saved(struct sched_resource *sr) void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext) { struct sched_unit *next = vnext->sched_unit; - struct sched_resource *sr = get_sched_res(smp_processor_id()); + struct sched_resource *sr; + + rcu_read_lock(&sched_res_rculock); + + sr = get_sched_res(smp_processor_id()); if ( atomic_read(&next->rendezvous_out_cnt) ) { @@ -1998,6 +2080,8 @@ void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext) if ( is_idle_vcpu(vprev) && vprev != vnext ) vprev->sched_unit = sr->sched_unit_idle; + + rcu_read_unlock(&sched_res_rculock); } static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, @@ -2021,6 +2105,8 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, vnext->sched_unit = get_sched_res(smp_processor_id())->sched_unit_idle; + rcu_read_unlock(&sched_res_rculock); + trace_continue_running(vnext); return continue_running(vprev); } @@ -2034,6 +2120,8 @@ static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext, vcpu_periodic_timer_work(vnext); + rcu_read_unlock(&sched_res_rculock); + context_switch(vprev, vnext); } @@ -2186,6 +2274,8 @@ static void sched_slave(void) ASSERT_NOT_IN_ATOMIC(); + rcu_read_lock(&sched_res_rculock); + lock = pcpu_schedule_lock_irq(cpu); now = NOW(); @@ -2209,6 +2299,8 @@ static void sched_slave(void) { pcpu_schedule_unlock_irq(lock, cpu); + rcu_read_unlock(&sched_res_rculock); + /* Check for failed forced context switch. */ if ( do_softirq ) raise_softirq(SCHEDULE_SOFTIRQ); @@ -2241,13 +2333,16 @@ static void schedule(void) struct sched_resource *sr; spinlock_t *lock; int cpu = smp_processor_id(); - unsigned int gran = get_sched_res(cpu)->granularity; + unsigned int gran; ASSERT_NOT_IN_ATOMIC(); SCHED_STAT_CRANK(sched_run); + rcu_read_lock(&sched_res_rculock); + sr = get_sched_res(cpu); + gran = sr->granularity; lock = pcpu_schedule_lock_irq(cpu); @@ -2259,6 +2354,8 @@ static void schedule(void) */ pcpu_schedule_unlock_irq(lock, cpu); + rcu_read_unlock(&sched_res_rculock); + raise_softirq(SCHEDULE_SOFTIRQ); return sched_slave(); } @@ -2370,14 +2467,27 @@ static int cpu_schedule_up(unsigned int cpu) return 0; } +static void sched_res_free(struct rcu_head *head) +{ + struct sched_resource *sr = container_of(head, struct sched_resource, rcu); + + xfree(sr); +} + static void cpu_schedule_down(unsigned int cpu) { - struct sched_resource *sr = get_sched_res(cpu); + struct sched_resource *sr; + + rcu_read_lock(&sched_res_rculock); + + sr = get_sched_res(cpu); kill_timer(&sr->s_timer); set_sched_res(cpu, NULL); - xfree(sr); + call_rcu(&sr->rcu, sched_res_free); + + rcu_read_unlock(&sched_res_rculock); } void sched_rm_cpu(unsigned int cpu) @@ -2397,6 +2507,8 @@ static int cpu_schedule_callback( unsigned int cpu = (unsigned long)hcpu; int rc = 0; + rcu_read_lock(&sched_res_rculock); + /* * From the scheduler perspective, bringing up a pCPU requires * allocating and initializing the per-pCPU scheduler specific data, @@ -2443,6 +2555,8 @@ static int cpu_schedule_callback( break; } + rcu_read_unlock(&sched_res_rculock); + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); } @@ -2532,8 +2646,13 @@ void __init scheduler_init(void) idle_domain->max_vcpus = nr_cpu_ids; if ( vcpu_create(idle_domain, 0) == NULL ) BUG(); + + rcu_read_lock(&sched_res_rculock); + get_sched_res(0)->curr = idle_vcpu[0]->sched_unit; get_sched_res(0)->sched_unit_idle = idle_vcpu[0]->sched_unit; + + rcu_read_unlock(&sched_res_rculock); } /* @@ -2546,9 +2665,14 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) struct vcpu *idle; void *ppriv, *vpriv; struct scheduler *new_ops = c->sched; - struct sched_resource *sr = get_sched_res(cpu); + struct sched_resource *sr; spinlock_t *old_lock, *new_lock; unsigned long flags; + int ret = 0; + + rcu_read_lock(&sched_res_rculock); + + sr = get_sched_res(cpu); ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus)); ASSERT(!cpumask_test_cpu(cpu, c->cpu_valid)); @@ -2568,13 +2692,18 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) idle = idle_vcpu[cpu]; ppriv = sched_alloc_pdata(new_ops, cpu); if ( IS_ERR(ppriv) ) - return PTR_ERR(ppriv); + { + ret = PTR_ERR(ppriv); + goto out; + } + vpriv = sched_alloc_udata(new_ops, idle->sched_unit, idle->domain->sched_priv); if ( vpriv == NULL ) { sched_free_pdata(new_ops, ppriv, cpu); - return -ENOMEM; + ret = -ENOMEM; + goto out; } /* @@ -2613,7 +2742,10 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) /* The cpu is added to a pool, trigger it to go pick up some work */ cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); - return 0; +out: + rcu_read_unlock(&sched_res_rculock); + + return ret; } /* @@ -2626,11 +2758,16 @@ int schedule_cpu_rm(unsigned int cpu) { struct vcpu *idle; void *ppriv_old, *vpriv_old; - struct sched_resource *sr = get_sched_res(cpu); - struct scheduler *old_ops = sr->scheduler; + struct sched_resource *sr; + struct scheduler *old_ops; spinlock_t *old_lock; unsigned long flags; + rcu_read_lock(&sched_res_rculock); + + sr = get_sched_res(cpu); + old_ops = sr->scheduler; + ASSERT(sr->cpupool != NULL); ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus)); ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid)); @@ -2663,6 +2800,8 @@ int schedule_cpu_rm(unsigned int cpu) sr->granularity = 1; sr->cpupool = NULL; + rcu_read_unlock(&sched_res_rculock); + return 0; } @@ -2711,6 +2850,8 @@ void schedule_dump(struct cpupool *c) /* Locking, if necessary, must be handled withing each scheduler */ + rcu_read_lock(&sched_res_rculock); + if ( c != NULL ) { sched = c->sched; @@ -2730,6 +2871,8 @@ void schedule_dump(struct cpupool *c) for_each_cpu (i, cpus) sched_dump_cpu_state(sched, i); } + + rcu_read_unlock(&sched_res_rculock); } void sched_tick_suspend(void) @@ -2737,10 +2880,14 @@ void sched_tick_suspend(void) struct scheduler *sched; unsigned int cpu = smp_processor_id(); + rcu_read_lock(&sched_res_rculock); + sched = get_sched_res(cpu)->scheduler; sched_do_tick_suspend(sched, cpu); rcu_idle_enter(cpu); rcu_idle_timer_start(); + + rcu_read_unlock(&sched_res_rculock); } void sched_tick_resume(void) @@ -2748,10 +2895,14 @@ void sched_tick_resume(void) struct scheduler *sched; unsigned int cpu = smp_processor_id(); + rcu_read_lock(&sched_res_rculock); + rcu_idle_timer_stop(); rcu_idle_exit(cpu); sched = get_sched_res(cpu)->scheduler; sched_do_tick_resume(sched, cpu); + + rcu_read_unlock(&sched_res_rculock); } void wait(void) diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index f8f0f484cb..3988985ee6 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -10,6 +10,7 @@ #include <xen/percpu.h> #include <xen/err.h> +#include <xen/rcupdate.h> /* A global pointer to the initial cpupool (POOL0). */ extern struct cpupool *cpupool0; @@ -57,18 +58,20 @@ struct sched_resource { unsigned int master_cpu; unsigned int granularity; const cpumask_t *cpus; /* cpus covered by this struct */ + struct rcu_head rcu; }; DECLARE_PER_CPU(struct sched_resource *, sched_res); +extern rcu_read_lock_t sched_res_rculock; static inline struct sched_resource *get_sched_res(unsigned int cpu) { - return per_cpu(sched_res, cpu); + return rcu_dereference(per_cpu(sched_res, cpu)); } static inline void set_sched_res(unsigned int cpu, struct sched_resource *res) { - per_cpu(sched_res, cpu) = res; + rcu_assign_pointer(per_cpu(sched_res, cpu), res); } static inline struct sched_unit *curr_on_cpu(unsigned int cpu) -- 2.16.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |