[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 03/11] qspinlock: Add pending bit



On Sun, Jun 15, 2014 at 02:47:00PM +0200, Peter Zijlstra wrote:
> Because the qspinlock needs to touch a second cacheline; add a pending
> bit and allow a single in-word spinner before we punt to the second
> cacheline.

Could you add this in the description please:

And by second cacheline we mean the local 'node'. That is the:
mcs_nodes[0] and mcs_nodes[idx]

Perhaps it might be better then to split this in the header file
as this is trying to not be a slowpath code - but rather - a
pre-slow-path-lets-try-if-we can do another cmpxchg in case
the unlocker has just unlocked itself.

So something like:

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index e8a7ae8..29cc9c7 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -75,11 +75,21 @@ extern void queue_spin_lock_slowpath(struct qspinlock 
*lock, u32 val);
  */
 static __always_inline void queue_spin_lock(struct qspinlock *lock)
 {
-       u32 val;
+       u32 val, new;
 
        val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
        if (likely(val == 0))
                return;
+
+       /* One more attempt - but if we fail mark it as pending. */
+       if (val == _Q_LOCKED_VAL) {
+               new = Q_LOCKED_VAL |_Q_PENDING_VAL;
+
+               old = atomic_cmpxchg(&lock->val, val, new);
+               if (old == _Q_LOCKED_VAL) /* YEEY! */
+                       return;
+               val = old;
+       }
        queue_spin_lock_slowpath(lock, val);
 }

and then the slowpath preserves most of the old logic path
(with the pending bit stuff)?

 
> 
> Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> ---
>  include/asm-generic/qspinlock_types.h |   12 ++-
>  kernel/locking/qspinlock.c            |  109 
> +++++++++++++++++++++++++++-------
>  2 files changed, 97 insertions(+), 24 deletions(-)
> 
> --- a/include/asm-generic/qspinlock_types.h
> +++ b/include/asm-generic/qspinlock_types.h
> @@ -39,8 +39,9 @@ typedef struct qspinlock {
>   * Bitfields in the atomic value:
>   *
>   *  0- 7: locked byte
> - *  8- 9: tail index
> - * 10-31: tail cpu (+1)
> + *     8: pending
> + *  9-10: tail index
> + * 11-31: tail cpu (+1)
>   */
>  #define      _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
>                                     << _Q_ ## type ## _OFFSET)
> @@ -48,7 +49,11 @@ typedef struct qspinlock {
>  #define _Q_LOCKED_BITS               8
>  #define _Q_LOCKED_MASK               _Q_SET_MASK(LOCKED)
>  
> -#define _Q_TAIL_IDX_OFFSET   (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
> +#define _Q_PENDING_OFFSET    (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
> +#define _Q_PENDING_BITS              1
> +#define _Q_PENDING_MASK              _Q_SET_MASK(PENDING)
> +
> +#define _Q_TAIL_IDX_OFFSET   (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
>  #define _Q_TAIL_IDX_BITS     2
>  #define _Q_TAIL_IDX_MASK     _Q_SET_MASK(TAIL_IDX)
>  
> @@ -57,5 +62,6 @@ typedef struct qspinlock {
>  #define _Q_TAIL_CPU_MASK     _Q_SET_MASK(TAIL_CPU)
>  
>  #define _Q_LOCKED_VAL                (1U << _Q_LOCKED_OFFSET)
> +#define _Q_PENDING_VAL               (1U << _Q_PENDING_OFFSET)
>  
>  #endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
> --- a/kernel/locking/qspinlock.c
> +++ b/kernel/locking/qspinlock.c
> @@ -83,24 +83,28 @@ static inline struct mcs_spinlock *decod
>       return per_cpu_ptr(&mcs_nodes[idx], cpu);
>  }
>  
> +#define _Q_LOCKED_PENDING_MASK       (_Q_LOCKED_MASK | _Q_PENDING_MASK)
> +
>  /**
>   * queue_spin_lock_slowpath - acquire the queue spinlock
>   * @lock: Pointer to queue spinlock structure
>   * @val: Current value of the queue spinlock 32-bit word
>   *
> - * (queue tail, lock bit)
> - *
> - *              fast      :    slow                                  :    
> unlock
> - *                        :                                          :
> - * uncontended  (0,0)   --:--> (0,1) --------------------------------:--> 
> (*,0)
> - *                        :       | ^--------.                    /  :
> - *                        :       v           \                   |  :
> - * uncontended            :    (n,x) --+--> (n,0)                 |  :
> - *   queue                :       | ^--'                          |  :
> - *                        :       v                               |  :
> - * contended              :    (*,x) --+--> (*,0) -----> (*,1) ---'  :
> - *   queue                :         ^--'                             :
> + * (queue tail, pending bit, lock bit)
>   *
> + *              fast     :    slow                                  :    
> unlock
> + *                       :                                          :
> + * uncontended  (0,0,0) -:--> (0,0,1) ------------------------------:--> 
> (*,*,0)
> + *                       :       | ^--------.------.             /  :
> + *                       :       v           \      \            |  :
> + * pending               :    (0,1,1) +--> (0,1,0)   \           |  :
> + *                       :       | ^--'              |           |  :
> + *                       :       v                   |           |  :
> + * uncontended           :    (n,x,y) +--> (n,0,0) --'           |  :
> + *   queue               :       | ^--'                          |  :
> + *                       :       v                               |  :
> + * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
> + *   queue               :         ^--'                             :
>   */
>  void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
>  {
> @@ -110,6 +114,65 @@ void queue_spin_lock_slowpath(struct qsp
>  
>       BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
>  
> +     /*
> +      * trylock || pending
> +      *
> +      * 0,0,0 -> 0,0,1 ; trylock
> +      * 0,0,1 -> 0,1,1 ; pending
> +      */
> +     for (;;) {
> +             /*
> +              * If we observe any contention; queue.
> +              */
> +             if (val & ~_Q_LOCKED_MASK)
> +                     goto queue;
> +
> +             new = _Q_LOCKED_VAL;
> +             if (val == new)
> +                     new |= _Q_PENDING_VAL;
> +
> +             old = atomic_cmpxchg(&lock->val, val, new);
> +             if (old == val)
> +                     break;
> +
> +             val = old;
> +     }
> +
> +     /*
> +      * we won the trylock
> +      */
> +     if (new == _Q_LOCKED_VAL)
> +             return;
> +
> +     /*
> +      * we're pending, wait for the owner to go away.
> +      *
> +      * *,1,1 -> *,1,0
> +      */
> +     while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
> +             cpu_relax();
> +
> +     /*
> +      * take ownership and clear the pending bit.
> +      *
> +      * *,1,0 -> *,0,1
> +      */
> +     for (;;) {
> +             new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
> +
> +             old = atomic_cmpxchg(&lock->val, val, new);
> +             if (old == val)
> +                     break;
> +
> +             val = old;
> +     }
> +     return;
> +
> +     /*
> +      * End of pending bit optimistic spinning and beginning of MCS
> +      * queuing.
> +      */
> +queue:
>       node = this_cpu_ptr(&mcs_nodes[0]);
>       idx = node->count++;
>       tail = encode_tail(smp_processor_id(), idx);
> @@ -119,15 +182,18 @@ void queue_spin_lock_slowpath(struct qsp
>       node->next = NULL;
>  
>       /*
> +      * we already touched the queueing cacheline; don't bother with pending
> +      * stuff.
> +      *
>        * trylock || xchg(lock, node)
>        *
> -      * 0,0 -> 0,1 ; trylock
> -      * p,x -> n,x ; prev = xchg(lock, node)
> +      * 0,0,0 -> 0,0,1 ; trylock
> +      * p,y,x -> n,y,x ; prev = xchg(lock, node)
>        */
>       for (;;) {
>               new = _Q_LOCKED_VAL;
>               if (val)
> -                     new = tail | (val & _Q_LOCKED_MASK);
> +                     new = tail | (val & _Q_LOCKED_PENDING_MASK);
>  
>               old = atomic_cmpxchg(&lock->val, val, new);
>               if (old == val)
> @@ -145,7 +211,7 @@ void queue_spin_lock_slowpath(struct qsp
>       /*
>        * if there was a previous node; link it and wait.
>        */
> -     if (old & ~_Q_LOCKED_MASK) {
> +     if (old & ~_Q_LOCKED_PENDING_MASK) {
>               prev = decode_tail(old);
>               ACCESS_ONCE(prev->next) = node;
>  
> @@ -153,18 +219,19 @@ void queue_spin_lock_slowpath(struct qsp
>       }
>  
>       /*
> -      * we're at the head of the waitqueue, wait for the owner to go away.
> +      * we're at the head of the waitqueue, wait for the owner & pending to
> +      * go away.
>        *
> -      * *,x -> *,0
> +      * *,x,y -> *,0,0
>        */
> -     while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
> +     while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
>               cpu_relax();
>  
>       /*
>        * claim the lock:
>        *
> -      * n,0 -> 0,1 : lock, uncontended
> -      * *,0 -> *,1 : lock, contended
> +      * n,0,0 -> 0,0,1 : lock, uncontended
> +      * *,0,0 -> *,0,1 : lock, contended
>        */
>       for (;;) {
>               new = _Q_LOCKED_VAL;
> 
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.