[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] MCE: Fix race condition in mctelem_reserve



This logic (mctelem) is related to dom0 mcelog logic. Have you tested if mcelog 
works fine with your patch?

Thanks,
Jinsong

Frediano Ziglio wrote:
> These lines (in mctelem_reserve)
> 
> 
>         newhead = oldhead->mcte_next;
>         if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
> 
> are racy. After you read the newhead pointer it can happen that
> another 
> flow (thread or recursive invocation) change all the list but set head
> with same value. So oldhead is the same as *freelp but you are setting
> a new head that could point to whatever element (even already used).
> 
> This patch use instead a bit array and atomic bit operations.
> 
> Actually it use unsigned long instead of bitmap type as testing for
> all zeroes is easier.
> 
> Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
> ---
>  xen/arch/x86/cpu/mcheck/mctelem.c |   52
>  ++++++++++++++++++++++--------------- 1 file changed, 31
> insertions(+), 21 deletions(-) 
> 
> diff --git a/xen/arch/x86/cpu/mcheck/mctelem.c
> b/xen/arch/x86/cpu/mcheck/mctelem.c index 895ce1a..e56b6fb 100644
> --- a/xen/arch/x86/cpu/mcheck/mctelem.c
> +++ b/xen/arch/x86/cpu/mcheck/mctelem.c
> @@ -69,6 +69,11 @@ struct mctelem_ent {
>  #define      MC_URGENT_NENT          10
>  #define      MC_NONURGENT_NENT       20
> 
> +/* Check if we can fit enough bits in the free bit array */
> +#if MC_URGENT_NENT + MC_NONURGENT_NENT > BITS_PER_LONG
> +#error Too much elements
> +#endif
> +
>  #define      MC_NCLASSES             (MC_NONURGENT + 1)
> 
>  #define      COOKIE2MCTE(c)          ((struct mctelem_ent *)(c))
> @@ -77,11 +82,9 @@ struct mctelem_ent {
>  static struct mc_telem_ctl {
>       /* Linked lists that thread the array members together.
>        *
> -      * The free lists are singly-linked via mcte_next, and we allocate
> -      * from them by atomically unlinking an element from the head.
> -      * Consumed entries are returned to the head of the free list.
> -      * When an entry is reserved off the free list it is not linked
> -      * on any list until it is committed or dismissed.
> +      * The free lists is a bit array where bit 1 means free.
> +      * This as element number is quite small and is easy to
> +      * atomically allocate that way.
>        *
>        * The committed list grows at the head and we do not maintain a
>        * tail pointer; insertions are performed atomically.  The head
> @@ -101,7 +104,7 @@ static struct mc_telem_ctl {
>        * we can lock it for updates.  The head of the processing list
>        * always has the oldest telemetry, and we append (as above)
>        * at the tail of the processing list. */
> -     struct mctelem_ent *mctc_free[MC_NCLASSES];
> +     unsigned long mctc_free[MC_NCLASSES];
>       struct mctelem_ent *mctc_committed[MC_NCLASSES];
>       struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
>       struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
> @@ -214,7 +217,10 @@ static void mctelem_free(struct mctelem_ent *tep)
>       BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
> 
>       tep->mcte_prev = NULL;
> -     mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
> +     tep->mcte_next = NULL;
> +
> +     /* set free in array */
> +     set_bit(tep - mctctl.mctc_elems, &mctctl.mctc_free[target]);
>  }
> 
>  /* Increment the reference count of an entry that is not linked on to
> @@ -284,7 +290,7 @@ void mctelem_init(int reqdatasz)
>       }
> 
>       for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
> -             struct mctelem_ent *tep, **tepp;
> +             struct mctelem_ent *tep;
> 
>               tep = mctctl.mctc_elems + i;
>               tep->mcte_flags = MCTE_F_STATE_FREE;
> @@ -292,16 +298,15 @@ void mctelem_init(int reqdatasz)
>               tep->mcte_data = datarr + i * datasz;
> 
>               if (i < MC_URGENT_NENT) {
> -                     tepp = &mctctl.mctc_free[MC_URGENT];
> -                     tep->mcte_flags |= MCTE_F_HOME_URGENT;
> +                     __set_bit(i, &mctctl.mctc_free[MC_URGENT]);
> +                     tep->mcte_flags = MCTE_F_HOME_URGENT;
>               } else {
> -                     tepp = &mctctl.mctc_free[MC_NONURGENT];
> -                     tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
> +                     __set_bit(i, &mctctl.mctc_free[MC_NONURGENT]);
> +                     tep->mcte_flags = MCTE_F_HOME_NONURGENT;
>               }
> 
> -             tep->mcte_next = *tepp;
> +             tep->mcte_next = NULL;
>               tep->mcte_prev = NULL;
> -             *tepp = tep;
>       }
>  }
> 
> @@ -310,18 +315,21 @@ static int mctelem_drop_count;
> 
>  /* Reserve a telemetry entry, or return NULL if none available.
>   * If we return an entry then the caller must subsequently call
> exactly one of - * mctelem_unreserve or mctelem_commit for that entry.
> + * mctelem_dismiss or mctelem_commit for that entry.
>   */
>  mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
>  {
> -     struct mctelem_ent **freelp;
> -     struct mctelem_ent *oldhead, *newhead;
> +     unsigned long *freelp;
> +     unsigned long oldfree;
> +     unsigned bit;
>       mctelem_class_t target = (which == MC_URGENT) ?
>           MC_URGENT : MC_NONURGENT;
> 
>       freelp = &mctctl.mctc_free[target];
>       for (;;) {
> -             if ((oldhead = *freelp) == NULL) {
> +             oldfree = *freelp;
> +
> +             if (oldfree == 0) {
>                       if (which == MC_URGENT && target == MC_URGENT) {
>                               /* raid the non-urgent freelist */
>                               target = MC_NONURGENT;
> @@ -333,9 +341,11 @@ mctelem_cookie_t mctelem_reserve(mctelem_class_t
>                       which) }
>               }
> 
> -             newhead = oldhead->mcte_next;
> -             if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
> -                     struct mctelem_ent *tep = oldhead;
> +             /* try to allocate, atomically clear free bit */
> +             bit = find_first_set_bit(oldfree);
> +             if (test_and_clear_bit(bit, freelp)) {
> +                     /* return element we got */
> +                     struct mctelem_ent *tep = mctctl.mctc_elems + bit;
> 
>                       mctelem_hold(tep);
>                       MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.