[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 1/4] Implement cbs algorithm, remove extra queues, latency scaling, and weight support from sedf



---
 xen/common/sched_sedf.c |  947 +++++++++--------------------------------------
 1 file changed, 173 insertions(+), 774 deletions(-)

diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
index 0c9011a..2ee4538 100644
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -25,24 +25,16 @@
 #define CHECK(_p) ((void)0)
 #endif
 
-#define EXTRA_NONE (0)
-#define EXTRA_AWARE (1)
-#define EXTRA_RUN_PEN (2)
-#define EXTRA_RUN_UTIL (4)
-#define EXTRA_WANT_PEN_Q (8)
-#define EXTRA_PEN_Q (0)
-#define EXTRA_UTIL_Q (1)
+#define SEDF_SOFT_TASK (1)
 #define SEDF_ASLEEP (16)
 
-#define EXTRA_QUANTUM (MICROSECS(500)) 
-#define WEIGHT_PERIOD (MILLISECS(100))
-#define WEIGHT_SAFETY (MILLISECS(5))
+#define DEFAULT_PERIOD (MILLISECS(20))
+#define DEFAULT_SLICE (MILLISECS(10))
 
 #define PERIOD_MAX MILLISECS(10000) /* 10s  */
 #define PERIOD_MIN (MICROSECS(10))  /* 10us */
 #define SLICE_MIN (MICROSECS(5))    /*  5us */
 
-#define IMPLY(a, b) (!(a) || (b))
 #define EQ(a, b) ((!!(a)) == (!!(b)))
 
 
@@ -58,24 +50,14 @@ struct sedf_priv_info {
 struct sedf_vcpu_info {
     struct vcpu *vcpu;
     struct list_head list;
-    struct list_head extralist[2];
  
     /* Parameters for EDF */
     s_time_t  period;  /* = relative deadline */
     s_time_t  slice;   /* = worst case execution time */
- 
-    /* Advaced Parameters */
+    /* Note: Server bandwidth = (slice / period) */
 
-    /* Latency Scaling */
-    s_time_t  period_orig;
-    s_time_t  slice_orig;
-    s_time_t  latency;
- 
     /* Status of domain */
     int       status;
-    /* Weights for "Scheduling for beginners/ lazy/ etc." ;) */
-    short     weight;
-    short     extraweight;
     /* Bookkeeping */
     s_time_t  deadl_abs;
     s_time_t  sched_start_abs;
@@ -84,28 +66,21 @@ struct sedf_vcpu_info {
     s_time_t  block_abs;
     s_time_t  unblock_abs;
  
-    /* Scores for {util, block penalty}-weighted extratime distribution */
-    int   score[2];
-    s_time_t  short_block_lost_tot;
- 
-    /* Statistics */
-    s_time_t  extra_time_tot;
-
 #ifdef SEDF_STATS
     s_time_t  block_time_tot;
-    s_time_t  penalty_time_tot;
     int   block_tot;
     int   short_block_tot;
     int   long_block_tot;
-    int   pen_extra_blocks;
-    int   pen_extra_slices;
+    s_time_t  miss_time;
+    s_time_t  over_time;
+    int   miss_tot;
+    int   over_tot;
 #endif
 };
 
 struct sedf_cpu_info {
     struct list_head runnableq;
     struct list_head waitq;
-    struct list_head extraq[2];
     s_time_t         current_slice_expires;
 };
 
@@ -115,102 +90,20 @@ struct sedf_cpu_info {
 #define CPU_INFO(cpu)  \
     ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
 #define LIST(d)        (&EDOM_INFO(d)->list)
-#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
 #define RUNQ(cpu)      (&CPU_INFO(cpu)->runnableq)
 #define WAITQ(cpu)     (&CPU_INFO(cpu)->waitq)
-#define EXTRAQ(cpu,i)  (&(CPU_INFO(cpu)->extraq[i]))
 #define IDLETASK(cpu)  (idle_vcpu[cpu])
 
 #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
 
 #define DIV_UP(x,y) (((x) + (y) - 1) / y)
 
-#define extra_runs(inf)      ((inf->status) & 6)
-#define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
 #define sedf_runnable(edom)  (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
 
+#define sedf_soft(edom)  (EDOM_INFO(edom)->status & SEDF_SOFT_TASK)
 
-static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
-
-static inline int extraq_on(struct vcpu *d, int i)
-{
-    return ((EXTRALIST(d,i)->next != NULL) &&
-            (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
-}
-
-static inline void extraq_del(struct vcpu *d, int i)
-{
-    struct list_head *list = EXTRALIST(d,i);
-    ASSERT(extraq_on(d,i));
-    list_del(list);
-    list->next = NULL;
-    ASSERT(!extraq_on(d, i));
-}
-
-/*
- * Adds a domain to the queue of processes which are aware of extra time. List
- * is sorted by score, where a lower score means higher priority for an extra
- * slice. It also updates the score, by simply subtracting a fixed value from
- * each entry, in order to avoid overflow. The algorithm works by simply
- * charging each domain that recieved extratime with an inverse of its weight.
- */ 
-static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub)
-{
-    struct list_head      *cur;
-    struct sedf_vcpu_info *curinf;
- 
-    ASSERT(!extraq_on(d,i));
-
-    /*
-     * Iterate through all elements to find our "hole" and on our way
-     * update all the other scores.
-     */
-    list_for_each ( cur, EXTRAQ(d->processor, i) )
-    {
-        curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
-        curinf->score[i] -= sub;
-        if ( EDOM_INFO(d)->score[i] < curinf->score[i] )
-            break;
-    }
-
-    /* cur now contains the element, before which we'll enqueue */
-    list_add(EXTRALIST(d,i),cur->prev);
- 
-    /* Continue updating the extraq */
-    if ( (cur != EXTRAQ(d->processor,i)) && sub )
-    {
-        for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next )
-        {
-            curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]);
-            curinf->score[i] -= sub;
-        }
-    }
-
-    ASSERT(extraq_on(d,i));
-}
-static inline void extraq_check(struct vcpu *d)
-{
-    if ( extraq_on(d, EXTRA_UTIL_Q) )
-    {
-        if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) &&
-             !extra_runs(EDOM_INFO(d)) )
-            extraq_del(d, EXTRA_UTIL_Q);
-    }
-    else
-    {
-        if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) )
-            extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
-    }
-}
-
-static inline void extraq_check_add_unblocked(struct vcpu *d, int priority)
-{
-    struct sedf_vcpu_info *inf = EDOM_INFO(d);
 
-    if ( inf->status & EXTRA_AWARE )
-        /* Put on the weighted extraq without updating any scores */
-        extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
-}
+static void sedf_dump_cpu_state(const struct scheduler *ops, int i);
 
 static inline int __task_on_queue(struct vcpu *d)
 {
@@ -284,11 +177,7 @@ static inline void __add_to_runqueue_sort(struct vcpu *v)
 
 static void sedf_insert_vcpu(const struct scheduler *ops, struct vcpu *v)
 {
-    if ( !is_idle_vcpu(v) )
-    {
-        extraq_check(v);
-    }
-    else
+    if ( is_idle_vcpu(v) )
     {
         EDOM_INFO(v)->deadl_abs = 0;
         EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
@@ -305,19 +194,23 @@ static void *sedf_alloc_vdata(const struct scheduler 
*ops, struct vcpu *v, void
 
     inf->vcpu = v;
 
-    /* Every VCPU gets an equal share of extratime by default */
-    inf->deadl_abs   = 0;
-    inf->latency     = 0;
-    inf->status      = EXTRA_AWARE | SEDF_ASLEEP;
-    inf->extraweight = 1;
-    /* Upon creation all domain are best-effort */
-    inf->period      = WEIGHT_PERIOD;
-    inf->slice       = 0;
+    inf->deadl_abs  = 0;
+    inf->cputime    = 0;
+    inf->status     = SEDF_ASLEEP;
+
+    if (v->domain->domain_id == 0)
+    {
+        /* Domain 0, needs a slice to boot the machine */
+        inf->period      = DEFAULT_PERIOD;
+        inf->slice       = DEFAULT_SLICE;
+    }
+    else
+    {
+        inf->period      = DEFAULT_PERIOD;
+        inf->slice       = 0;
+    }
 
-    inf->period_orig = inf->period; inf->slice_orig = inf->slice;
     INIT_LIST_HEAD(&(inf->list));
-    INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
-    INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
 
     SCHED_STAT_CRANK(vcpu_init);
 
@@ -333,8 +226,6 @@ sedf_alloc_pdata(const struct scheduler *ops, int cpu)
     BUG_ON(spc == NULL);
     INIT_LIST_HEAD(&spc->waitq);
     INIT_LIST_HEAD(&spc->runnableq);
-    INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
-    INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
 
     return (void *)spc;
 }
@@ -410,49 +301,59 @@ static void desched_edf_dom(s_time_t now, struct vcpu* d)
   
     __del_from_queue(d);
 
-    /*
-     * Manage bookkeeping (i.e. calculate next deadline, memorise
-     * overrun-time of slice) of finished domains.
-     */
+#ifdef SEDF_STATS
+    /* Manage deadline misses */
+    if ( unlikely(inf->deadl_abs < now) )
+    {
+        inf->miss_tot++;
+        inf->miss_time += inf->cputime;
+    }
+#endif
+
+    /* Manage overruns */
     if ( inf->cputime >= inf->slice )
     {
         inf->cputime -= inf->slice;
-  
-        if ( inf->period < inf->period_orig )
-        {
-            /* This domain runs in latency scaling or burst mode */
-            inf->period *= 2;
-            inf->slice  *= 2;
-            if ( (inf->period > inf->period_orig) ||
-                 (inf->slice > inf->slice_orig) )
-            {
-                /* Reset slice and period */
-                inf->period = inf->period_orig;
-                inf->slice = inf->slice_orig;
-            }
-        }
 
         /* Set next deadline */
         inf->deadl_abs += inf->period;
+
+        /* Ensure that the cputime is always less than slice */
+        if ( unlikely(inf->cputime > inf->slice) )
+        {
+#ifdef SEDF_STATS
+            inf->over_tot++;
+            inf->over_time += inf->cputime;
+#endif
+
+            /* Make up for the overage by pushing the deadline
+               into the future */
+            inf->deadl_abs += ((inf->cputime / inf->slice)
+                               * inf->period) * 2;
+            inf->cputime -= (inf->cputime / inf->slice) * inf->slice;
+        }
+
+        /* Ensure that the start of the next period is in the future */
+        if ( unlikely(PERIOD_BEGIN(inf) < now) )
+            inf->deadl_abs += 
+                (DIV_UP(now - PERIOD_BEGIN(inf),
+                        inf->period)) * inf->period;
     }
  
     /* Add a runnable domain to the waitqueue */
     if ( sedf_runnable(d) )
     {
-        __add_to_waitqueue_sort(d);
-    }
-    else
-    {
-        /* We have a blocked realtime task -> remove it from exqs too */
-        if ( extraq_on(d, EXTRA_PEN_Q) )
-            extraq_del(d, EXTRA_PEN_Q);
-        if ( extraq_on(d, EXTRA_UTIL_Q) )
-            extraq_del(d, EXTRA_UTIL_Q);
+        if( sedf_soft(d) )
+        {
+            __add_to_runqueue_sort(d);
+        }
+        else 
+        {
+            __add_to_waitqueue_sort(d);
+        }
     }
-
+    
     ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
-    ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
-                 sedf_runnable(d)));
 }
 
 
@@ -498,217 +399,12 @@ static void update_queues(
             /* Put them back into the queue */
             __add_to_waitqueue_sort(curinf->vcpu);
         }
-        else if ( unlikely((curinf->deadl_abs < now) ||
-                           (curinf->cputime > curinf->slice)) )
-        {
-            /*
-             * We missed the deadline or the slice was already finished.
-             * Might hapen because of dom_adj.
-             */
-            printk("\tDomain %i.%i exceeded it's deadline/"
-                   "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
-                   " cputime: %"PRIu64"\n",
-                   curinf->vcpu->domain->domain_id,
-                   curinf->vcpu->vcpu_id,
-                   curinf->deadl_abs, curinf->slice, now,
-                   curinf->cputime);
-            __del_from_queue(curinf->vcpu);
-
-            /* Common case: we miss one period */
-            curinf->deadl_abs += curinf->period;
-
-            /*
-             * If we are still behind: modulo arithmetic, force deadline
-             * to be in future and aligned to period borders.
-             */
-            if ( unlikely(curinf->deadl_abs < now) )
-                curinf->deadl_abs += 
-                    DIV_UP(now - curinf->deadl_abs,
-                           curinf->period) * curinf->period;
-            ASSERT(curinf->deadl_abs >= now);
-
-            /* Give a fresh slice */
-            curinf->cputime = 0;
-            if ( PERIOD_BEGIN(curinf) > now )
-                __add_to_waitqueue_sort(curinf->vcpu);
-            else
-                __add_to_runqueue_sort(curinf->vcpu);
-        }
         else
             break;
     }
 }
 
 
-/*
- * removes a domain from the head of the according extraQ and
- * requeues it at a specified position:
- *   round-robin extratime: end of extraQ
- *   weighted ext.: insert in sorted list by score
- * if the domain is blocked / has regained its short-block-loss
- * time it is not put on any queue.
- */
-static void desched_extra_dom(s_time_t now, struct vcpu *d)
-{
-    struct sedf_vcpu_info *inf = EDOM_INFO(d);
-    int i = extra_get_cur_q(inf);
-    unsigned long oldscore;
-
-    ASSERT(extraq_on(d, i));
-
-    /* Unset all running flags */
-    inf->status  &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
-    /* Fresh slice for the next run */
-    inf->cputime = 0;
-    /* Accumulate total extratime */
-    inf->extra_time_tot += now - inf->sched_start_abs;
-    /* Remove extradomain from head of the queue. */
-    extraq_del(d, i);
-
-    /* Update the score */
-    oldscore = inf->score[i];
-    if ( i == EXTRA_PEN_Q )
-    {
-        /* Domain was running in L0 extraq */
-        /* reduce block lost, probably more sophistication here!*/
-        /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
-        inf->short_block_lost_tot -= now - inf->sched_start_abs;
-#if 0
-        /* KAF: If we don't exit short-blocking state at this point
-         * domain0 can steal all CPU for up to 10 seconds before
-         * scheduling settles down (when competing against another
-         * CPU-bound domain). Doing this seems to make things behave
-         * nicely. Noone gets starved by default.
-         */
-        if ( inf->short_block_lost_tot <= 0 )
-#endif
-        {
-            /* We have (over-)compensated our block penalty */
-            inf->short_block_lost_tot = 0;
-            /* We don't want a place on the penalty queue anymore! */
-            inf->status &= ~EXTRA_WANT_PEN_Q;
-            goto check_extra_queues;
-        }
-
-        /*
-         * We have to go again for another try in the block-extraq,
-         * the score is not used incremantally here, as this is
-         * already done by recalculating the block_lost
-         */
-        inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
-            inf->short_block_lost_tot;
-        oldscore = 0;
-    }
-    else
-    {
-        /*
-         * Domain was running in L1 extraq => score is inverse of
-         * utilization and is used somewhat incremental!
-         */
-        if ( !inf->extraweight )
-        {
-            /* NB: use fixed point arithmetic with 10 bits */
-            inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
-                inf->slice;
-        }
-        else
-        {
-            /*
-             * Conversion between realtime utilisation and extrawieght:
-             * full (ie 100%) utilization is equivalent to 128 extraweight
-             */
-            inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
-        }
-    }
-
- check_extra_queues:
-    /* Adding a runnable domain to the right queue and removing blocked ones */
-    if ( sedf_runnable(d) )
-    {
-        /* Add according to score: weighted round robin */
-        if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
-            ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
-            extraq_add_sort_update(d, i, oldscore);
-    }
-    else
-    {
-        /* Remove this blocked domain from the waitq! */
-        __del_from_queue(d);
-        /* Make sure that we remove a blocked domain from the other
-         * extraq too. */
-        if ( i == EXTRA_PEN_Q )
-        {
-            if ( extraq_on(d, EXTRA_UTIL_Q) )
-                extraq_del(d, EXTRA_UTIL_Q);
-        }
-        else
-        {
-            if ( extraq_on(d, EXTRA_PEN_Q) )
-                extraq_del(d, EXTRA_PEN_Q);
-        }
-    }
-
-    ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
-    ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
-                 sedf_runnable(d)));
-}
-
-
-static struct task_slice sedf_do_extra_schedule(
-    s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
-{
-    struct task_slice   ret = { 0 };
-    struct sedf_vcpu_info *runinf;
-    ASSERT(end_xt > now);
-
-    /* Enough time left to use for extratime? */
-    if ( end_xt - now < EXTRA_QUANTUM )
-        goto return_idle;
-
-    if ( !list_empty(extraq[EXTRA_PEN_Q]) )
-    {
-        /*
-         * We still have elements on the level 0 extraq
-         * => let those run first!
-         */
-        runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
-                              struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
-        runinf->status |= EXTRA_RUN_PEN;
-        ret.task = runinf->vcpu;
-        ret.time = EXTRA_QUANTUM;
-#ifdef SEDF_STATS
-        runinf->pen_extra_slices++;
-#endif
-    }
-    else
-    {
-        if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
-        {
-            /* Use elements from the normal extraqueue */
-            runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
-                                  struct sedf_vcpu_info,
-                                  extralist[EXTRA_UTIL_Q]);
-            runinf->status |= EXTRA_RUN_UTIL;
-            ret.task = runinf->vcpu;
-            ret.time = EXTRA_QUANTUM;
-        }
-        else
-            goto return_idle;
-    }
-
-    ASSERT(ret.time > 0);
-    ASSERT(sedf_runnable(ret.task));
-    return ret;
- 
- return_idle:
-    ret.task = IDLETASK(cpu);
-    ret.time = end_xt - now;
-    ASSERT(ret.time > 0);
-    ASSERT(sedf_runnable(ret.task));
-    return ret;
-}
-
-
 static int sedf_init(struct scheduler *ops)
 {
     struct sedf_priv_info *prv;
@@ -748,8 +444,6 @@ static struct task_slice sedf_do_schedule(
     struct list_head     *runq     = RUNQ(cpu);
     struct list_head     *waitq    = WAITQ(cpu);
     struct sedf_vcpu_info *inf     = EDOM_INFO(current);
-    struct list_head      *extraq[] = {
-        EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)};
     struct sedf_vcpu_info *runinf, *waitinf;
     struct task_slice      ret;
 
@@ -770,15 +464,7 @@ static struct task_slice sedf_do_schedule(
     if ( inf->status & SEDF_ASLEEP )
         inf->block_abs = now;
 
-    if ( unlikely(extra_runs(inf)) )
-    {
-        /* Special treatment of domains running in extra time */
-        desched_extra_dom(now, current);
-    }
-    else 
-    {
-        desched_edf_dom(now, current);
-    }
+    desched_edf_dom(now, current);
  check_waitq:
     update_queues(now, runq, waitq);
 
@@ -820,12 +506,9 @@ static struct task_slice sedf_do_schedule(
     else
     {
         waitinf  = list_entry(waitq->next,struct sedf_vcpu_info, list);
-        /*
-         * We could not find any suitable domain 
-         * => look for domains that are aware of extratime
-         */
-        ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
-                                     extraq, cpu);
+
+        ret.task = IDLETASK(cpu);
+        ret.time = PERIOD_BEGIN(waitinf) - now;
     }
 
     /*
@@ -833,11 +516,8 @@ static struct task_slice sedf_do_schedule(
      * still can happen!!!
      */
     if ( ret.time < 0)
-    {
         printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
                ret.time);
-        ret.time = EXTRA_QUANTUM;
-    }
 
     ret.migrated = 0;
 
@@ -848,7 +528,6 @@ static struct task_slice sedf_do_schedule(
     return ret;
 }
 
-
 static void sedf_sleep(const struct scheduler *ops, struct vcpu *d)
 {
     if ( is_idle_vcpu(d) )
@@ -864,13 +543,35 @@ static void sedf_sleep(const struct scheduler *ops, 
struct vcpu *d)
     {
         if ( __task_on_queue(d) )
             __del_from_queue(d);
-        if ( extraq_on(d, EXTRA_UTIL_Q) ) 
-            extraq_del(d, EXTRA_UTIL_Q);
-        if ( extraq_on(d, EXTRA_PEN_Q) )
-            extraq_del(d, EXTRA_PEN_Q);
     }
 }
 
+/*
+ * Compares two domains in the relation of whether the one is allowed to
+ * interrupt the others execution.
+ * It returns true (!=0) if a switch to the other domain is good.
+ * Priority scheme is as follows:
+ *  EDF: early deadline > late deadline
+ */
+static inline int should_switch(struct vcpu *cur,
+                                struct vcpu *other,
+                                s_time_t now)
+{
+    struct sedf_vcpu_info *cur_inf, *other_inf;
+    cur_inf   = EDOM_INFO(cur);
+    other_inf = EDOM_INFO(other);
+
+    /* Always interrupt idle domain. */
+    if ( is_idle_vcpu(cur) )
+        return 1;
+
+    /* Check whether we need to make an earlier scheduling decision */
+    if ( PERIOD_BEGIN(other_inf) < 
+         CPU_INFO(other->processor)->current_slice_expires )
+        return 1;
+
+    return 0;
+}
 
 /*
  * This function wakes up a domain, i.e. moves them into the waitqueue
@@ -904,8 +605,6 @@ static void sedf_sleep(const struct scheduler *ops, struct 
vcpu *d)
  *
  *     -this also doesn't disturb scheduling, but might lead to the fact, that
  *      the domain can't finish it's workload in the period
- *     -in addition to that the domain can be treated prioritised when
- *      extratime is available
  *     -addition: experiments have shown that this may have a HUGE impact on
  *      performance of other domains, becaus it can lead to excessive context
  *      switches
@@ -931,10 +630,6 @@ static void sedf_sleep(const struct scheduler *ops, struct 
vcpu *d)
  *      DRB______D___URRRR___D...<prev [Thread] next>
  *                       (D) <- old deadline was here
  *     -problem: deadlines don't occur isochronous anymore
- *    Part 2c (Improved Atropos design)
- *     -when a domain unblocks it is given a very short period (=latency hint)
- *      and slice length scaled accordingly
- *     -both rise again to the original value (e.g. get doubled every period)
  *
  * 3. Unconservative (i.e. incorrect)
  *     -to boost the performance of I/O dependent domains it would be possible
@@ -944,136 +639,6 @@ static void sedf_sleep(const struct scheduler *ops, 
struct vcpu *d)
  *     -either behaviour can lead to missed deadlines in other domains as
  *      opposed to approaches 1,2a,2b
  */
-static void unblock_short_extra_support(
-    struct sedf_vcpu_info* inf, s_time_t now)
-{
-    /*
-     * This unblocking scheme tries to support the domain, by assigning it
-     * a priority in extratime distribution according to the loss of time
-     * in this slice due to blocking
-     */
-    s_time_t pen;
- 
-    /* No more realtime execution in this period! */
-    inf->deadl_abs += inf->period;
-    if ( likely(inf->block_abs) )
-    {
-        /* Treat blocked time as consumed by the domain */
-        /*inf->cputime += now - inf->block_abs;*/
-        /*
-         * Penalty is time the domain would have
-         * had if it continued to run.
-         */
-        pen = (inf->slice - inf->cputime);
-        if ( pen < 0 )
-            pen = 0;
-        /* Accumulate all penalties over the periods */
-        /*inf->short_block_lost_tot += pen;*/
-        /* Set penalty to the current value */
-        inf->short_block_lost_tot = pen;
-        /* Not sure which one is better.. but seems to work well... */
-  
-        if ( inf->short_block_lost_tot )
-        {
-            inf->score[0] = (inf->period << 10) /
-                inf->short_block_lost_tot;
-#ifdef SEDF_STATS
-            inf->pen_extra_blocks++;
-#endif
-            if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
-                /* Remove domain for possible resorting! */
-                extraq_del(inf->vcpu, EXTRA_PEN_Q);
-            else
-                /*
-                 * Remember that we want to be on the penalty q
-                 * so that we can continue when we (un-)block
-                 * in penalty-extratime
-                 */
-                inf->status |= EXTRA_WANT_PEN_Q;
-   
-            /* (re-)add domain to the penalty extraq */
-            extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
-        }
-    }
-
-    /* Give it a fresh slice in the next period! */
-    inf->cputime = 0;
-}
-
-
-static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
-{
-    /* Conservative 2b */
-
-    /* Treat the unblocking time as a start of a new period */
-    inf->deadl_abs = now + inf->period;
-    inf->cputime = 0;
-}
-
-
-#define DOMAIN_EDF   1
-#define DOMAIN_EXTRA_PEN  2
-#define DOMAIN_EXTRA_UTIL  3
-#define DOMAIN_IDLE   4
-static inline int get_run_type(struct vcpu* d)
-{
-    struct sedf_vcpu_info* inf = EDOM_INFO(d);
-    if (is_idle_vcpu(d))
-        return DOMAIN_IDLE;
-    if (inf->status & EXTRA_RUN_PEN)
-        return DOMAIN_EXTRA_PEN;
-    if (inf->status & EXTRA_RUN_UTIL)
-        return DOMAIN_EXTRA_UTIL;
-    return DOMAIN_EDF;
-}
-
-
-/*
- * Compares two domains in the relation of whether the one is allowed to
- * interrupt the others execution.
- * It returns true (!=0) if a switch to the other domain is good.
- * Current Priority scheme is as follows:
- *  EDF > L0 (penalty based) extra-time > 
- *  L1 (utilization) extra-time > idle-domain
- * In the same class priorities are assigned as following:
- *  EDF: early deadline > late deadline
- *  L0 extra-time: lower score > higher score
- */
-static inline int should_switch(struct vcpu *cur,
-                                struct vcpu *other,
-                                s_time_t now)
-{
-    struct sedf_vcpu_info *cur_inf, *other_inf;
-    cur_inf   = EDOM_INFO(cur);
-    other_inf = EDOM_INFO(other);
- 
-    /* Check whether we need to make an earlier scheduling decision */
-    if ( PERIOD_BEGIN(other_inf) < 
-         CPU_INFO(other->processor)->current_slice_expires )
-        return 1;
-
-    /* No timing-based switches need to be taken into account here */
-    switch ( get_run_type(cur) )
-    {
-    case DOMAIN_EDF:
-        /* Do not interrupt a running EDF domain */
-        return 0;
-    case DOMAIN_EXTRA_PEN:
-        /* Check whether we also want the L0 ex-q with lower score */
-        return ((other_inf->status & EXTRA_WANT_PEN_Q) &&
-                (other_inf->score[EXTRA_PEN_Q] < 
-                 cur_inf->score[EXTRA_PEN_Q]));
-    case DOMAIN_EXTRA_UTIL:
-        /* Check whether we want the L0 extraq. Don't
-         * switch if both domains want L1 extraq. */
-        return !!(other_inf->status & EXTRA_WANT_PEN_Q);
-    case DOMAIN_IDLE:
-        return 1;
-    }
-
-    return 1;
-}
-
 static void sedf_wake(const struct scheduler *ops, struct vcpu *d)
 {
     s_time_t              now = NOW();
@@ -1087,8 +652,6 @@ static void sedf_wake(const struct scheduler *ops, struct 
vcpu *d)
 
     ASSERT(!sedf_runnable(d));
     inf->status &= ~SEDF_ASLEEP;
-    ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
-    ASSERT(!extraq_on(d, EXTRA_PEN_Q));
  
     if ( unlikely(inf->deadl_abs == 0) )
     {
@@ -1100,62 +663,65 @@ static void sedf_wake(const struct scheduler *ops, 
struct vcpu *d)
     inf->block_tot++;
 #endif
 
-    if ( unlikely(now < PERIOD_BEGIN(inf)) )
-    {
-        /* Unblocking in extra-time! */
-        if ( inf->status & EXTRA_WANT_PEN_Q )
+    if ( sedf_soft(d) )
+    {
+        /* Apply CBS rule
+         * Where:
+         *      c == Remaining server slice == (inf->slice - cpu_time) 
+         *      d == Server (vcpu) deadline  == inf->deadl_abs
+         *      r == Wake-up time of vcpu    == now
+         *      U == Server (vcpu) bandwidth == (inf->slice / inf->period)
+         *
+         * if c>=(d-r)*U  --->  
+         *      (inf->slice - cputime) >= (inf->deadl_abs - now) * inf->period
+         *
+         * If true, push deadline back by one period and refresh slice, else
+         * use current slice and deadline.
+         */
+        if((inf->slice - inf->cputime) >= 
+            ((inf->deadl_abs - now) * (inf->slice / inf->period)))
         {
-            /*
-             * We have a domain that wants compensation
-             * for block penalty and did just block in
-             * its compensation time. Give it another
-             * chance!
-             */
-            extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
+            /* Push back deadline by one period */
+            inf->deadl_abs += inf->period;
+            inf->cputime = 0;
         }
-        extraq_check_add_unblocked(d, 0);
-    }  
-    else
-    {  
+        
+        /* In CBS we don't care if the period has begun,
+         * the task doesn't have to wait for its period
+         * because it'll never request more than its slice
+         * for any given period.
+         */
+        __add_to_runqueue_sort(d);
+    }
+    else {
+        /* Task is a hard task, treat accordingly */
+#ifdef SEDF_STATS
         if ( now < inf->deadl_abs )
         {
             /* Short blocking */
-#ifdef SEDF_STATS
             inf->short_block_tot++;
-#endif
-            unblock_short_extra_support(inf, now);
-
-            extraq_check_add_unblocked(d, 1);
         }
         else
         {
-            /* Long unblocking */
-#ifdef SEDF_STATS
+            /* Long unblocking, someone is going to miss their deadline. */
             inf->long_block_tot++;
+        }
 #endif
-            unblock_long_cons_b(inf, now);
 
-            extraq_check_add_unblocked(d, 1);
-        }
+        if ( PERIOD_BEGIN(inf) > now )
+            __add_to_waitqueue_sort(d);
+        else
+            __add_to_runqueue_sort(d);
     }
-
-    if ( PERIOD_BEGIN(inf) > now )
-        __add_to_waitqueue_sort(d);
-    else
-        __add_to_runqueue_sort(d);
  
 #ifdef SEDF_STATS
     /* Do some statistics here... */
     if ( inf->block_abs != 0 )
     {
         inf->block_time_tot += now - inf->block_abs;
-        inf->penalty_time_tot +=
-            PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
     }
 #endif
 
-    /* Sanity check: make sure each extra-aware domain IS on the util-q! */
-    ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
     ASSERT(__task_on_queue(d));
     /*
      * Check whether the awakened task needs to invoke the do_schedule
@@ -1170,35 +736,27 @@ static void sedf_wake(const struct scheduler *ops, 
struct vcpu *d)
         cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 }
 
-
 /* Print a lot of useful information about a domains in the system */
 static void sedf_dump_domain(struct vcpu *d)
 {
     printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
            d->is_running ? 'T':'F');
-    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
-           " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
-           EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
-           EDOM_INFO(d)->weight,
-           EDOM_INFO(d)->score[EXTRA_UTIL_Q],
-           (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
-           EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
+    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64,
+           EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs);
     
 #ifdef SEDF_STATS
-    if ( EDOM_INFO(d)->block_time_tot != 0 )
-        printk(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
-               EDOM_INFO(d)->block_time_tot);
+    printk(" m=%u mt=%"PRIu64"o=%u ot=%"PRIu64, 
+           EDOM_INFO(d)->miss_tot, EDOM_INFO(d)->miss_time, 
+           EDOM_INFO(d)->over_tot, EDOM_INFO(d)->over_time);
+
     if ( EDOM_INFO(d)->block_tot != 0 )
-        printk("\n   blks=%u sh=%u (%u%%) (shex=%i "\
-               "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
+        printk("\n   blks=%u sh=%u (%u%%) "\
+               "l=%u (%u%%) avg: b=%"PRIu64,
                EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
                (EDOM_INFO(d)->short_block_tot * 100) / EDOM_INFO(d)->block_tot,
-               EDOM_INFO(d)->pen_extra_blocks,
-               EDOM_INFO(d)->pen_extra_slices,
                EDOM_INFO(d)->long_block_tot,
                (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
-               (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
-               (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
+               (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot);
 #endif
     printk("\n");
 }
@@ -1234,30 +792,6 @@ static void sedf_dump_cpu_state(const struct scheduler 
*ops, int i)
         sedf_dump_domain(d_inf->vcpu);
     }
  
-    queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
-    printk("\nEXTRAQ (penalty) rq %lx   n: %lx, p: %lx\n",
-           (unsigned long)queue, (unsigned long) queue->next,
-           (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue )
-    {
-        d_inf = list_entry(list, struct sedf_vcpu_info,
-                           extralist[EXTRA_PEN_Q]);
-        printk("%3d: ",loop++);
-        sedf_dump_domain(d_inf->vcpu);
-    }
- 
-    queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
-    printk("\nEXTRAQ (utilization) rq %lx   n: %lx, p: %lx\n",
-           (unsigned long)queue, (unsigned long) queue->next,
-           (unsigned long) queue->prev);
-    list_for_each_safe ( list, tmp, queue )
-    {
-        d_inf = list_entry(list, struct sedf_vcpu_info,
-                           extralist[EXTRA_UTIL_Q]);
-        printk("%3d: ",loop++);
-        sedf_dump_domain(d_inf->vcpu);
-    }
- 
     loop = 0;
     printk("\nnot on Q\n");
 
@@ -1279,199 +813,69 @@ static void sedf_dump_cpu_state(const struct scheduler 
*ops, int i)
 }
 
 
-/* Adjusts periods and slices of the domains accordingly to their weights */
-static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw, 
s_time_t *sumt)
-{
-    struct vcpu *p;
-    struct domain      *d;
-    unsigned int        cpu;
-
-    /*
-     * Sum across all weights. Notice that no runq locking is needed
-     * here: the caller holds sedf_priv_info.lock and we're not changing
-     * anything that is accessed during scheduling.
-     */
-    rcu_read_lock(&domlist_read_lock);
-    for_each_domain_in_cpupool( d, c )
-    {
-        for_each_vcpu( d, p )
-        {
-            if ( (cpu = p->processor) >= nr_cpus )
-                continue;
-
-            if ( EDOM_INFO(p)->weight )
-            {
-                sumw[cpu] += EDOM_INFO(p)->weight;
-            }
-            else
-            {
-                /*
-                 * Don't modify domains who don't have a weight, but sum
-                 * up the time they need, projected to a WEIGHT_PERIOD,
-                 * so that this time is not given to the weight-driven
-                 *  domains
-                 */
-
-                /* Check for overflows */
-                ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
-                       && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
-                sumt[cpu] += 
-                    (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / 
-                    EDOM_INFO(p)->period_orig;
-            }
-        }
-    }
-    rcu_read_unlock(&domlist_read_lock);
-
-    /*
-     * Adjust all slices (and periods) to the new weight. Unlike above, we
-     * need to take thr runq lock for the various VCPUs: we're modyfing
-     * slice and period which are referenced during scheduling.
-     */
-    rcu_read_lock(&domlist_read_lock);
-    for_each_domain_in_cpupool( d, c )
-    {
-        for_each_vcpu ( d, p )
-        {
-            if ( (cpu = p->processor) >= nr_cpus )
-                continue;
-            if ( EDOM_INFO(p)->weight )
-            {
-                /* Interrupts already off */
-                spinlock_t *lock = vcpu_schedule_lock(p);
-
-                EDOM_INFO(p)->period_orig = 
-                    EDOM_INFO(p)->period  = WEIGHT_PERIOD;
-                EDOM_INFO(p)->slice_orig  =
-                    EDOM_INFO(p)->slice   = 
-                    (EDOM_INFO(p)->weight *
-                     (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
-
-                vcpu_schedule_unlock(lock, p);
-            }
-        }
-    }
-    rcu_read_unlock(&domlist_read_lock);
-
-    return 0;
-}
-
-
 /* Set or fetch domain scheduling parameters */
 static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct 
xen_domctl_scheduler_op *op)
 {
     struct sedf_priv_info *prv = SEDF_PRIV(ops);
     unsigned long flags;
-    unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1;
-    int *sumw = xzalloc_array(int, nr_cpus);
-    s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
+    s_time_t now = NOW();
     struct vcpu *v;
     int rc = 0;
 
     /*
      * Serialize against the pluggable scheduler lock to protect from
      * concurrent updates. We need to take the runq lock for the VCPUs
-     * as well, since we are touching extraweight, weight, slice and
-     * period. As in sched_credit2.c, runq locks nest inside the
-     * pluggable scheduler lock.
+     * as well, since we are touching slice and period. 
+     *
+     * As in sched_credit2.c, runq locks nest inside the  pluggable scheduler
+     * lock.
      */
     spin_lock_irqsave(&prv->lock, flags);
 
     if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
     {
-        /*
-         * These are used in sedf_adjust_weights() but have to be allocated in
-         * this function, as we need to avoid nesting xmem_pool_alloc's lock
-         * within our prv->lock.
-         */
-        if ( !sumw || !sumt )
+        /* Check for sane parameters */
+        if ( !op->u.sedf.period )
         {
-            /* Check for errors here, the _getinfo branch doesn't care */
-            rc = -ENOMEM;
+            printk("Period Not set");
+            rc = -EINVAL;
             goto out;
         }
 
-        /* Check for sane parameters */
-        if ( !op->u.sedf.period && !op->u.sedf.weight )
+        /*
+         * Sanity checking
+         */
+        if ( (op->u.sedf.period > PERIOD_MAX) ||
+             (op->u.sedf.period < PERIOD_MIN) ||
+             (op->u.sedf.slice  > op->u.sedf.period) ||
+             (op->u.sedf.slice  < SLICE_MIN) )
         {
+            printk("Insane Parameters: period: %lu\tbudget: %lu\n", 
op->u.sedf.period, op->u.sedf.slice);
             rc = -EINVAL;
             goto out;
         }
 
-        if ( op->u.sedf.weight )
+        /* Time-driven domains */
+        for_each_vcpu ( p, v )
         {
-            if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
-                 (!op->u.sedf.period) )
+            spinlock_t *lock = vcpu_schedule_lock(v);
+
+            EDOM_INFO(v)->period  = op->u.sedf.period;
+            EDOM_INFO(v)->slice   = op->u.sedf.slice;
+            if(op->u.sedf.soft)
             {
-                /* Weight-driven domains with extratime only */
-                for_each_vcpu ( p, v )
-                {
-                    /* (Here and everywhere in the following) IRQs are already 
off,
-                     * hence vcpu_spin_lock() is the one. */
-                    spinlock_t *lock = vcpu_schedule_lock(v);
-
-                    EDOM_INFO(v)->extraweight = op->u.sedf.weight;
-                    EDOM_INFO(v)->weight = 0;
-                    EDOM_INFO(v)->slice = 0;
-                    EDOM_INFO(v)->period = WEIGHT_PERIOD;
-                    vcpu_schedule_unlock(lock, v);
-                }
+                EDOM_INFO(v)->status |= SEDF_SOFT_TASK;
             }
             else
             {
-                /* Weight-driven domains with real-time execution */
-                for_each_vcpu ( p, v )
+                /* Correct deadline when switching from a soft to hard vcpu */
+                if( unlikely((EDOM_INFO(v)->deadl_abs - now) >= 
(EDOM_INFO(v)->period * 3)) )
                 {
-                    spinlock_t *lock = vcpu_schedule_lock(v);
-
-                    EDOM_INFO(v)->weight = op->u.sedf.weight;
-                    vcpu_schedule_unlock(lock, v);
+                    EDOM_INFO(v)->deadl_abs = (now - EDOM_INFO(v)->cputime) + 
(2 * EDOM_INFO(v)->period);
                 }
+                
+                EDOM_INFO(v)->status &= (~SEDF_SOFT_TASK);
             }
-        }
-        else
-        {
-            /*
-             * Sanity checking: note that disabling extra weight requires
-             * that we set a non-zero slice.
-             */
-            if ( (op->u.sedf.period > PERIOD_MAX) ||
-                 (op->u.sedf.period < PERIOD_MIN) ||
-                 (op->u.sedf.slice  > op->u.sedf.period) ||
-                 (op->u.sedf.slice  < SLICE_MIN) )
-            {
-                rc = -EINVAL;
-                goto out;
-            }
-
-            /* Time-driven domains */
-            for_each_vcpu ( p, v )
-            {
-                spinlock_t *lock = vcpu_schedule_lock(v);
-
-                EDOM_INFO(v)->weight = 0;
-                EDOM_INFO(v)->extraweight = 0;
-                EDOM_INFO(v)->period_orig = 
-                    EDOM_INFO(v)->period  = op->u.sedf.period;
-                EDOM_INFO(v)->slice_orig  = 
-                    EDOM_INFO(v)->slice   = op->u.sedf.slice;
-                vcpu_schedule_unlock(lock, v);
-            }
-        }
-
-        rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt);
-        if ( rc )
-            goto out;
-
-        for_each_vcpu ( p, v )
-        {
-            spinlock_t *lock = vcpu_schedule_lock(v);
-
-            EDOM_INFO(v)->status  = 
-                (EDOM_INFO(v)->status &
-                 ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
-            EDOM_INFO(v)->latency = op->u.sedf.latency;
-            extraq_check(v);
             vcpu_schedule_unlock(lock, v);
         }
     }
@@ -1485,17 +889,12 @@ static int sedf_adjust(const struct scheduler *ops, 
struct domain *p, struct xen
 
         op->u.sedf.period    = EDOM_INFO(p->vcpu[0])->period;
         op->u.sedf.slice     = EDOM_INFO(p->vcpu[0])->slice;
-        op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
-        op->u.sedf.latency   = EDOM_INFO(p->vcpu[0])->latency;
-        op->u.sedf.weight    = EDOM_INFO(p->vcpu[0])->weight;
+        op->u.sedf.soft      = sedf_soft(p->vcpu[0]);
     }
 
 out:
     spin_unlock_irqrestore(&prv->lock, flags);
 
-    xfree(sumt);
-    xfree(sumw);
-
     return rc;
 }
 
-- 
1.7.9.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.