/**************************************************************************** * (C) 2013 - Manohar Vanga - MPI-SWS **************************************************************************** * * File: common/sched_xfair.c * Author: Manohar Vanga * * Description: Table driven scheduler for Xen */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Default timeslice: 30ms */ #define XFAIR_DEFAULT_TSLICE_MS 30 /* Some useful macros */ /* Get the private data from a set of ops */ #define XFAIR_PRIV(_ops) \ ((struct xfair_private *)((_ops)->sched_data)) /* Get the PCPU structure for a given CPU number */ #define XFAIR_PCPU(_c) \ ((struct xfair_pcpu *)per_cpu(schedule_data, _c).sched_priv) /* Get the XFair VCPU structure for a given Xen VCPU */ #define XFAIR_VCPU(_vcpu) ((struct xfair_vcpu *) (_vcpu)->sched_priv) /* Get the XFair dom structure for a given Xen dom */ #define XFAIR_DOM(_dom) ((struct xfair_dom *) (_dom)->sched_priv) /* Get the runqueue for a particular CPU */ #define RUNQ(_cpu) (&(XFAIR_PCPU(_cpu)->runq)) /* Is the first element of _cpu's runq its idle vcpu? */ #define IS_RUNQ_IDLE(_cpu) (list_empty(RUNQ(_cpu)) || \ is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu)) /* Xfair tracing events */ #define TRC_XFAIR_SCHED_START TRC_SCHED_CLASS_EVT(XFAIR, 1) #define TRC_XFAIR_SCHED_END TRC_SCHED_CLASS_EVT(XFAIR, 2) /* Physical CPU */ struct xfair_pcpu { struct list_head runq; #if 0 struct timer ticker; unsigned int tick; #endif }; /* Virtual CPU */ struct xfair_vcpu { struct xfair_dom *domain; /* The domain this VCPU belongs to */ struct vcpu *vcpu; /* The core Xen VCPU structure */ struct list_head runq_elem; /* List element for adding to runqueue */ }; /* Domain */ struct xfair_dom { struct domain *dom; /* The core Xen domain structure */ }; /* System-wide private data */ struct xfair_private { spinlock_t lock; }; static inline int __vcpu_on_runq(struct xfair_vcpu *vcpu) { return !list_empty(&vcpu->runq_elem); } static inline struct xfair_vcpu *__runq_elem(struct list_head *elem) { return list_entry(elem, struct xfair_vcpu, runq_elem); } static inline void __runq_insert(unsigned int cpu, struct xfair_vcpu *vcpu) { struct list_head *runq = RUNQ(cpu); BUG_ON(__vcpu_on_runq(vcpu)); BUG_ON(cpu != vcpu->vcpu->processor); /* Add back at the end of the list */ list_add_tail(&vcpu->runq_elem, runq); } static inline void __runq_remove(struct xfair_vcpu *vcpu) { BUG_ON(!__vcpu_on_runq(vcpu)); list_del_init(&vcpu->runq_elem); } static inline void print_runq(unsigned int cpu) { struct xfair_vcpu *c; struct list_head *runq = RUNQ(cpu); debug("RUNQ: "); list_for_each_entry(c, runq, runq_elem) debug("(%d.%d) ", c->domain->dom->domain_id, c->vcpu->vcpu_id); debug("\n"); } /* Allocate a structure for a physical CPU */ static void *xfair_alloc_pdata(const struct scheduler *ops, int cpu) { struct xfair_pcpu *pcpu; debug(KERN_INFO "%s: ", __func__); debug("cpu=%d\n", cpu); /* Allocate per-PCPU info */ pcpu = xzalloc(struct xfair_pcpu); if (pcpu == NULL) return NULL; INIT_LIST_HEAD(&pcpu->runq); /* schedule.c expects this to not be NULL (for some reason) */ if (per_cpu(schedule_data, cpu).sched_priv == NULL) per_cpu(schedule_data, cpu).sched_priv = pcpu; BUG_ON(!is_idle_vcpu(curr_on_cpu(cpu))); return pcpu; } static void xfair_free_pdata(const struct scheduler *ops, void *pc, int cpu) { struct xfair_pcpu *pcpu = pc; debug(KERN_INFO "%s: ", __func__); debug("cpu=%d\n", cpu); if (pcpu) xfree(pcpu); } static void *xfair_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) { struct xfair_vcpu *vcpu; /* Allocate per-VCPU info */ vcpu = xzalloc(struct xfair_vcpu); if (vcpu == NULL) return NULL; INIT_LIST_HEAD(&vcpu->runq_elem); vcpu->domain = dd; vcpu->vcpu = vc; debug(KERN_INFO "%s: ", __func__); debug("vcpu=%d\n", vc->vcpu_id); return vcpu; } static void xfair_free_vdata(const struct scheduler *ops, void *vc) { struct xfair_vcpu *vcpu = vc; if (!vcpu) return; debug(KERN_INFO "%s: ", __func__); debug("vcpu=%d\n", vcpu->vcpu->vcpu_id); BUG_ON(!list_empty(&vcpu->runq_elem)); xfree(vcpu); } static void xfair_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) { struct xfair_vcpu *vcpu = vc->sched_priv; BUG_ON(!vcpu); debug(KERN_INFO "%s: ", __func__); debug("vcpu=%d\n", vcpu->vcpu->vcpu_id); if (!vc->is_running && vcpu_runnable(vc) && !__vcpu_on_runq(vcpu)) __runq_insert(vc->processor, vcpu); } static void xfair_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) { struct xfair_vcpu * const vcpu = XFAIR_VCPU(vc); struct xfair_dom * const dom = vcpu->domain; BUG_ON(!vcpu); debug(KERN_INFO "%s: ", __func__); debug("vcpu=%d\n", vcpu->vcpu->vcpu_id); if (__vcpu_on_runq(vcpu)) __runq_remove(vcpu); BUG_ON(dom == NULL); BUG_ON(!list_empty(&vcpu->runq_elem)); } static void xfair_sleep(const struct scheduler *ops, struct vcpu *vc) { struct xfair_vcpu * const vcpu = XFAIR_VCPU(vc); debug(KERN_INFO "%s: ", __func__); debug("dom=%d, vcpu=%d\n", vcpu->domain->dom->domain_id, vcpu->vcpu->vcpu_id); BUG_ON(is_idle_vcpu(vc)); BUG_ON(vcpu_runnable(vc)); /* If the vcpu is the current VCPU on the processor, it is guaranteed to * not be on the runqueue of that processor. * However, now we need to make sure that if it wasn't the current task * and was instead waiting in the runqueue, it should be removed */ if (curr_on_cpu(vc->processor) == vc) cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); else if (__vcpu_on_runq(vcpu)) __runq_remove(vcpu); } static void xfair_wake(const struct scheduler *ops, struct vcpu *vc) { struct xfair_vcpu * const vcpu = XFAIR_VCPU(vc); debug(KERN_INFO "%s: ", __func__); debug("dom=%d, vcpu=%d\n", vcpu->domain->dom->domain_id, vcpu->vcpu->vcpu_id); BUG_ON(is_idle_vcpu(vc)); if (unlikely(curr_on_cpu(vc->processor) == vc)) { debug("woke vcpu=%d that is currently running on cpu=%d\n", vc->vcpu_id, vc->processor); return; } if (unlikely(__vcpu_on_runq(vcpu))) { debug("vcpu=%d is already on runqueue of cpu=%d\n", vc->vcpu_id, vc->processor); return; } if (!__vcpu_on_runq(vcpu) && vcpu_runnable(vc) && !vc->is_running ) __runq_insert(vc->processor, vcpu); cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); } static void xfair_yield(const struct scheduler *ops, struct vcpu *vc) { #ifdef RTS_CONFIG_DEBUG struct xfair_vcpu * const vcpu = XFAIR_VCPU(vc); #endif debug(KERN_INFO "%s: ", __func__); debug("dom=%d, vcpu=%d\n", vcpu->domain->dom->domain_id, vcpu->vcpu->vcpu_id); } static void *xfair_alloc_domdata(const struct scheduler *ops, struct domain *d) { struct xfair_dom *dom; debug(KERN_INFO "%s: ", __func__); debug("dom=%d\n", d->domain_id); dom = xzalloc(struct xfair_dom); if (dom == NULL) return NULL; dom->dom = d; return (void *)dom; } static void xfair_free_domdata(const struct scheduler *ops, void *d) { #ifdef RTS_CONFIG_DEBUG struct xfair_dom *dom = d; #endif debug(KERN_INFO "%s: ", __func__); debug("dom=%d\n", dom->dom->domain_id); xfree(d); } static int xfair_dom_init(const struct scheduler *ops, struct domain *d) { struct xfair_dom *dom; if (is_idle_domain(d)) return 0; dom = xfair_alloc_domdata(ops, d); if (dom == NULL) return -ENOMEM; d->sched_priv = dom; debug(KERN_INFO "%s: ", __func__); debug("dom=%d\n", d->domain_id); return 0; } static void xfair_dom_destroy(const struct scheduler *ops, struct domain *d) { debug(KERN_INFO "%s: ", __func__); debug("dom=%d\n", d->domain_id); xfair_free_domdata(ops, XFAIR_DOM(d)); } static int xfair_pick_cpu(const struct scheduler *ops, struct vcpu *v) { debug(KERN_INFO "%s: ", __func__); debug("vcpu=%d, pcpu picked=%d\n", v->vcpu_id, v->processor); return v->processor; } /* * This function is in the critical path. It is designed to be simple and * fast for the common case. */ static struct task_slice xfair_schedule( const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled) { const int cpu = smp_processor_id(); struct list_head * const runq = RUNQ(cpu); struct xfair_vcpu * const scurr = XFAIR_VCPU(current); struct xfair_vcpu *snext; struct task_slice ret; s_time_t tslice = MILLISECS(30); /* Add this VCPU back into the runqueue */ if (!__vcpu_on_runq(scurr) && vcpu_runnable(current) && !is_idle_vcpu(current)) __runq_insert(cpu, scurr); print_runq(cpu); /* Tasklet work (which runs in idle VCPU context) overrides all else. */ if (tasklet_work_scheduled) { debug(KERN_INFO "%s: ", __func__); debug("tasklet work scheduled. idling.\n"); snext = XFAIR_VCPU(idle_vcpu[cpu]); } else { /* Select next runnable local VCPU (ie top of local runq) */ if (!list_empty(runq)) { snext = __runq_elem(runq->next); if (__vcpu_on_runq(snext)) __runq_remove(snext); } else { snext = XFAIR_VCPU(idle_vcpu[cpu]); } } print_runq(cpu); /* Initialize, check and return task to run next */ ret.task = snext->vcpu; ret.time = (is_idle_vcpu(snext->vcpu) ? -1 : tslice); ret.migrated = 0; if (snext && snext->vcpu != current) { debug(KERN_INFO "%s: ", __func__); if (!is_idle_vcpu(snext->vcpu)) debug("CPU %d picked(dom.vcpu)=%d.%d\n", cpu, snext->domain->dom->domain_id, snext->vcpu->vcpu_id); else debug("CPU %d picked(dom.vcpu)=idle.%d\n", cpu, snext->vcpu->vcpu_id); } return ret; } static int xfair_init(struct scheduler *ops) { struct xfair_private *priv; priv = xzalloc(struct xfair_private); if (priv == NULL) return -ENOMEM; ops->sched_data = priv; spin_lock_init(&priv->lock); debugtrace_toggle(); return 0; } static void xfair_deinit(const struct scheduler *ops) { struct xfair_private *priv; priv = XFAIR_PRIV(ops); if (priv) xfree(priv); } static struct xfair_private _xfair_priv; const struct scheduler sched_xfair_def = { .name = "XFair Table Driver Scheduler", .opt_name = "xfair", .sched_id = XEN_SCHEDULER_XFAIR, .sched_data = &_xfair_priv, .init_domain = xfair_dom_init, .destroy_domain = xfair_dom_destroy, .insert_vcpu = xfair_vcpu_insert, .remove_vcpu = xfair_vcpu_remove, .sleep = xfair_sleep, .wake = xfair_wake, .yield = xfair_yield, .pick_cpu = xfair_pick_cpu, .do_schedule = xfair_schedule, .init = xfair_init, .deinit = xfair_deinit, .alloc_vdata = xfair_alloc_vdata, .free_vdata = xfair_free_vdata, .alloc_pdata = xfair_alloc_pdata, .free_pdata = xfair_free_pdata, .alloc_domdata = xfair_alloc_domdata, .free_domdata = xfair_free_domdata, };