Xen project Mailing List

Re: [Xen-devel] [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model
From: Tianyang Chen <tiche@xxxxxxxxxxxxxx>
Date: Wed, 24 Feb 2016 10:23:27 -0500
Cc: dario.faggioli@xxxxxxxxxx, george.dunlap@xxxxxxxxxx, Dagaen Golomb <dgolomb@xxxxxxxxxxxxxx>, Meng Xu <mengxu@xxxxxxxxxxxxx>
Delivery-date: Wed, 24 Feb 2016 15:25:04 +0000
List-id: Xen developer discussion <xen-devel.lists.xen.org>
Hey Dario: We are aiming for the next release and would appreciate it if
you can leave some comments on this version. Thanks.
Tianyang On 2/8/2016 11:33 PM, Tianyang Chen wrote:
Changes since v4:
     removed unnecessary replenishment queue checks in vcpu_wake()
     extended replq_remove() to all cases in vcpu_sleep()
     used _deadline_queue_insert() helper function for both queues
     _replq_insert() and _replq_remove() program timer internally

Changes since v3:
     removed running queue.
     added repl queue to keep track of repl events.
     timer is now per scheduler.
     timer is init on a valid cpu in a cpupool.

Signed-off-by: Tianyang Chen <tiche@xxxxxxxxxxxxxx>
Signed-off-by: Meng Xu <mengxu@xxxxxxxxxxxxx>
Signed-off-by: Dagaen Golomb <dgolomb@xxxxxxxxxxxxxx>
---
  xen/common/sched_rt.c |  337 ++++++++++++++++++++++++++++++++++++-------------
  1 file changed, 251 insertions(+), 86 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..1f0bb7b 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
  #include <xen/delay.h>
  #include <xen/event.h>
  #include <xen/time.h>
+#include <xen/timer.h>
  #include <xen/perfc.h>
  #include <xen/sched-if.h>
  #include <xen/softirq.h>
@@ -87,7 +88,7 @@
  #define RTDS_DEFAULT_BUDGET     (MICROSECS(4000))

  #define UPDATE_LIMIT_SHIFT      10
-#define MAX_SCHEDULE            (MILLISECS(1))
+
  /*
   * Flags
   */
@@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
   */
  static unsigned int nr_rt_ops;

+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
+/* checks if a timer is active or not */
+bool_t active_timer(struct timer* t);
+
  /*
   * Systme-wide private data, include global RunQueue/DepletedQ
   * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +159,9 @@ struct rt_private {
      struct list_head sdom;      /* list of availalbe domains, used for dump */
      struct list_head runq;      /* ordered list of runnable vcpus */
      struct list_head depletedq; /* unordered list of depleted vcpus */
+    struct list_head replq;     /* ordered list of vcpus that need 
replenishment */
      cpumask_t tickled;          /* cpus been tickled */
+    struct timer *repl_timer;   /* replenishment timer */
  };

  /*
@@ -160,6 +169,7 @@ struct rt_private {
   */
  struct rt_vcpu {
      struct list_head q_elem;    /* on the runq/depletedq list */
+    struct list_head replq_elem;/* on the repl event list */

      /* Up-pointers */
      struct rt_dom *sdom;
@@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct 
scheduler *ops)
      return &rt_priv(ops)->depletedq;
  }

+static inline struct list_head *rt_replq(const struct scheduler *ops)
+{
+    return &rt_priv(ops)->replq;
+}
+
  /*
- * Queue helper functions for runq and depletedq
+ * Queue helper functions for runq, depletedq
+ * and replenishment event queue
   */
  static int
  __vcpu_on_q(const struct rt_vcpu *svc)
@@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
      return list_entry(elem, struct rt_vcpu, q_elem);
  }

+static struct rt_vcpu *
+__replq_elem(struct list_head *elem)
+{
+    return list_entry(elem, struct rt_vcpu, replq_elem);
+}
+
+static int
+__vcpu_on_replq(const struct rt_vcpu *svc)
+{
+   return !list_empty(&svc->replq_elem);
+}
+
  /*
   * Debug related code, dump vcpu/cpu information
   */
@@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
  static void
  rt_dump(const struct scheduler *ops)
  {
-    struct list_head *runq, *depletedq, *iter;
+    struct list_head *runq, *depletedq, *replq, *iter;
      struct rt_private *prv = rt_priv(ops);
      struct rt_vcpu *svc;
      struct rt_dom *sdom;
@@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)

      runq = rt_runq(ops);
      depletedq = rt_depletedq(ops);
+    replq = rt_replq(ops);

      printk("Global RunQueue info:\n");
      list_for_each( iter, runq )
@@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
          rt_dump_vcpu(ops, svc);
      }

+    printk("Global Replenishment Event info:\n");
+    list_for_each( iter, replq )
+    {
+        svc = __replq_elem(iter);
+        rt_dump_vcpu(ops, svc);
+    }
+
      printk("Domain info:\n");
      list_for_each( iter, &prv->sdom )
      {
@@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
  }

  /*
+ * Removing a vcpu from the replenishment queue could
+ * re-program the timer for the next replenishment event
+ * if the timer is currently active
+ */
+static inline void
+__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+    struct rt_private *prv = rt_priv(ops);
+    struct list_head *replq = rt_replq(ops);
+    struct timer* repl_timer = prv->repl_timer;
+
+    if ( __vcpu_on_replq(svc) )
+    {
+        /*
+         * disarm the timer if removing the first replenishment event
+         * which is going to happen next
+         */
+        if( active_timer(repl_timer) )
+        {
+            struct rt_vcpu *next_repl = __replq_elem(replq->next);
+
+            if( next_repl->cur_deadline == svc->cur_deadline )
+                repl_timer->expires = 0;
+
+            list_del_init(&svc->replq_elem);
+
+            /* re-arm the timer for the next replenishment event */
+            if( !list_empty(replq) )
+            {
+                struct rt_vcpu *svc_next = __replq_elem(replq->next);
+                set_timer(repl_timer, svc_next->cur_deadline);
+            }
+        }
+
+        else
+            list_del_init(&svc->replq_elem);
+    }
+}
+
+/*
+ * An utility function that inserts a vcpu to a
+ * queue based on certain order (EDF)
+ */
+static void
+_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem),
+    struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue)
+{
+    struct list_head *iter;
+
+    list_for_each(iter, queue)
+    {
+        struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
+        if ( svc->cur_deadline <= iter_svc->cur_deadline )
+                break;
+    }
+
+    list_add_tail(elem, iter);
+}
+
+/*
   * Insert svc with budget in RunQ according to EDF:
   * vcpus with smaller deadlines go first.
   * Insert svc without budget in DepletedQ unsorted;
@@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)
  {
      struct rt_private *prv = rt_priv(ops);
      struct list_head *runq = rt_runq(ops);
-    struct list_head *iter;

      ASSERT( spin_is_locked(&prv->lock) );

@@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)

      /* add svc to runq if svc still has budget */
      if ( svc->cur_budget > 0 )
-    {
-        list_for_each(iter, runq)
-        {
-            struct rt_vcpu * iter_svc = __q_elem(iter);
-            if ( svc->cur_deadline <= iter_svc->cur_deadline )
-                    break;
-         }
-        list_add_tail(&svc->q_elem, iter);
-    }
+        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
      else
-    {
          list_add(&svc->q_elem, &prv->depletedq);
-    }
  }

  /*
+ * Insert svc into the repl even list:
+ * vcpus that needs to be repl earlier go first.
+ * scheduler private lock serializes this operation
+ * it could re-program the timer if it fires later than
+ * this vcpu's cur_deadline. Also, this is used to program
+ * the timer for the first time.
+ */
+static void
+__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+    struct list_head *replq = rt_replq(ops);
+    struct rt_private *prv = rt_priv(ops);
+    struct timer *repl_timer = prv->repl_timer;
+
+    ASSERT( !__vcpu_on_replq(svc) );
+
+    _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq);
+
+    if( repl_timer->expires == 0 ||
+        ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline 
) )
+        set_timer(repl_timer,svc->cur_deadline);
+}
+
+
+/*
   * Init/Free related code
   */
  static int
@@ -449,11 +559,18 @@ rt_init(struct scheduler *ops)
      INIT_LIST_HEAD(&prv->sdom);
      INIT_LIST_HEAD(&prv->runq);
      INIT_LIST_HEAD(&prv->depletedq);
+    INIT_LIST_HEAD(&prv->replq);

      cpumask_clear(&prv->tickled);

      ops->sched_data = prv;

+    /*
+     * The timer initialization will happen later when
+     * the first pcpu is added to this pool in alloc_pdata
+     */
+    prv->repl_timer = NULL;
+
      return 0;

   no_mem:
@@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops)
          xfree(_cpumask_scratch);
          _cpumask_scratch = NULL;
      }
+
+    kill_timer(prv->repl_timer);
+    xfree(prv->repl_timer);
+
      xfree(prv);
  }

@@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
      if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
          return NULL;

+    if( prv->repl_timer == NULL )
+    {
+        /* allocate the timer on the first cpu of this pool */
+        prv->repl_timer = xzalloc(struct timer);
+
+        if(prv->repl_timer == NULL )
+            return NULL;
+
+        init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu);
+    }
+
      /* 1 indicates alloc. succeed in schedule.c */
      return (void *)1;
  }
@@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu 
*vc, void *dd)
          return NULL;

      INIT_LIST_HEAD(&svc->q_elem);
+    INIT_LIST_HEAD(&svc->replq_elem);
      svc->flags = 0U;
      svc->sdom = dd;
      svc->vcpu = vc;
@@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv)
  }

  /*
- * This function is called in sched_move_domain() in schedule.c
+ * It is called in sched_move_domain() and sched_init_vcpu
+ * in schedule.c
   * When move a domain to a new cpupool.
   * It inserts vcpus of moving domain to the scheduler's RunQ in
   * dest. cpupool.
@@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu 
*vc)
      lock = vcpu_schedule_lock_irq(vc);
      if ( __vcpu_on_q(svc) )
          __q_remove(svc);
+
+    if( __vcpu_on_replq(svc) )
+        __replq_remove(ops,svc);
+
      vcpu_schedule_unlock_irq(lock, vc);
  }

@@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t 
*mask)
  }

  /*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{
-    struct list_head *runq = rt_runq(ops);
-    struct list_head *depletedq = rt_depletedq(ops);
-    struct list_head *iter;
-    struct list_head *tmp;
-    struct rt_vcpu *svc = NULL;
-
-    list_for_each_safe(iter, tmp, runq)
-    {
-        svc = __q_elem(iter);
-        if ( now < svc->cur_deadline )
-            break;
-
-        rt_update_deadline(now, svc);
-        /* reinsert the vcpu if its deadline is updated */
-        __q_remove(svc);
-        __runq_insert(ops, svc);
-    }
-
-    list_for_each_safe(iter, tmp, depletedq)
-    {
-        svc = __q_elem(iter);
-        if ( now >= svc->cur_deadline )
-        {
-            rt_update_deadline(now, svc);
-            __q_remove(svc); /* remove from depleted queue */
-            __runq_insert(ops, svc); /* add to runq */
-        }
-    }
-}
-
-/*
   * schedule function for rt scheduler.
   * The lock is already grabbed in schedule.c, no need to lock here
   */
@@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
      /* burn_budget would return for IDLE VCPU */
      burn_budget(ops, scurr, now);

-    __repl_update(ops, now);

      if ( tasklet_work_scheduled )
      {
@@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
          set_bit(__RTDS_delayed_runq_add, &scurr->flags);

      snext->last_start = now;
+
+    ret.time =  -1; /* if an idle vcpu is picked */
      if ( !is_idle_vcpu(snext->vcpu) )
      {
          if ( snext != scurr )
@@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
              snext->vcpu->processor = cpu;
              ret.migrated = 1;
          }
+
+        ret.time = snext->budget; /* invoke the scheduler next time */
+
      }

-    ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
      ret.task = snext->vcpu;

      /* TRACE */
@@ -914,7 +1017,7 @@ static void
  rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
  {
      struct rt_vcpu * const svc = rt_vcpu(vc);
-
+
      BUG_ON( is_idle_vcpu(vc) );
      SCHED_STAT_CRANK(vcpu_sleep);

@@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
          __q_remove(svc);
      else if ( svc->flags & RTDS_delayed_runq_add )
          clear_bit(__RTDS_delayed_runq_add, &svc->flags);
+
+    if( __vcpu_on_replq(svc) )
+        __replq_remove(ops, svc);
  }

  /*
@@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
  {
      struct rt_vcpu * const svc = rt_vcpu(vc);
      s_time_t now = NOW();
-    struct rt_private *prv = rt_priv(ops);
-    struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
-    struct rt_dom *sdom = NULL;
-    cpumask_t *online;

      BUG_ON( is_idle_vcpu(vc) );

@@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
      else
          SCHED_STAT_CRANK(vcpu_wake_not_runnable);

+    /* budget repl here is needed before inserting back to runq. If so,
+     * it should be re-inserted back to the replenishment queue.
+     */
+    if ( now >= svc->cur_deadline)
+    {
+        rt_update_deadline(now, svc);
+        __replq_remove(ops, svc);
+    }
+
+    if( !__vcpu_on_replq(svc) )
+        __replq_insert(ops, svc);
+
      /* If context hasn't been saved for this vcpu yet, we can't put it on
       * the Runqueue/DepletedQ. Instead, we set a flag so that it will be
       * put on the Runqueue/DepletedQ after the context has been saved.
@@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
          return;
      }

-    if ( now >= svc->cur_deadline)
-        rt_update_deadline(now, svc);
-
      /* insert svc to runq/depletedq because svc is not in queue now */
      __runq_insert(ops, svc);

-    __repl_update(ops, now);
-
-    ASSERT(!list_empty(&prv->sdom));
-    sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
-    online = cpupool_domain_cpumask(sdom->dom);
-    snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */
-
-    runq_tickle(ops, snext);
-
-    return;
+    runq_tickle(ops, svc);
  }

  /*
@@ -1087,10 +1189,6 @@ static void
  rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
  {
      struct rt_vcpu *svc = rt_vcpu(vc);
-    struct rt_vcpu *snext = NULL;
-    struct rt_dom *sdom = NULL;
-    struct rt_private *prv = rt_priv(ops);
-    cpumask_t *online;
      spinlock_t *lock = vcpu_schedule_lock_irq(vc);

      clear_bit(__RTDS_scheduled, &svc->flags);
@@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct 
vcpu *vc)
           likely(vcpu_runnable(vc)) )
      {
          __runq_insert(ops, svc);
-        __repl_update(ops, NOW());
-
-        ASSERT(!list_empty(&prv->sdom));
-        sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
-        online = cpupool_domain_cpumask(sdom->dom);
-        snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
-
-        runq_tickle(ops, snext);
+        runq_tickle(ops, svc);
      }
  out:
      vcpu_schedule_unlock_irq(lock, vc);
@@ -1168,6 +1259,80 @@ rt_dom_cntl(
      return rc;
  }

+/*
+ * The replenishment timer handler picks vcpus
+ * from the replq and does the actual replenishment
+ */
+static void repl_handler(void *data){
+    unsigned long flags;
+    s_time_t now = NOW();
+    struct scheduler *ops = data;
+    struct rt_private *prv = rt_priv(ops);
+    struct list_head *replq = rt_replq(ops);
+    struct timer *repl_timer = prv->repl_timer;
+    struct list_head *iter, *tmp;
+    struct rt_vcpu *svc = NULL;
+
+    spin_lock_irqsave(&prv->lock, flags);
+
+    stop_timer(repl_timer);
+
+    list_for_each_safe(iter, tmp, replq)
+    {
+        svc = __replq_elem(iter);
+
+        if ( now >= svc->cur_deadline )
+        {
+            rt_update_deadline(now, svc);
+
+            /*
+             * when the replenishment happens
+             * svc is either on a pcpu or on
+             * runq/depletedq
+             */
+            if( __vcpu_on_q(svc) )
+            {
+                /* put back to runq */
+                __q_remove(svc);
+                __runq_insert(ops, svc);
+            }
+
+            /*
+             * tickle regardless where it's at
+             * because a running vcpu could have
+             * a later deadline than others after
+             * replenishment
+             */
+            runq_tickle(ops, svc);
+
+            /* update replenishment event queue */
+            __replq_remove(ops, svc);
+            __replq_insert(ops, svc);
+        }
+
+        else
+            break;
+    }
+
+    /*
+     * use the vcpu that's on the top
+     * or else don't program the timer
+     */
+    if( !list_empty(replq) )
+        set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline);
+
+    spin_unlock_irqrestore(&prv->lock, flags);
+
+}
+
+/* checks if a timer has been stopped or not */
+bool_t active_timer(struct timer *timer)
+{
+    ASSERT(timer->status >= TIMER_STATUS_inactive);
+    ASSERT(timer->status <= TIMER_STATUS_in_list);
+    return (timer->status >= TIMER_STATUS_in_heap);
+}
+
  static struct rt_private _rt_priv;

  static const struct scheduler sched_rtds_def = {
_______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.