[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/2] make fls() and ffs() consistent across architectures



On 22/01/15 13:38, Jan Beulich wrote:
> Their parameter types differed between ARM and x86.
>
> Along with generalizing the functions this fixes
> - x86's non-long functions having long parameter types
> - ARM's ffs() using a long intermediate variable
> - generic_fls64() being broken when the upper half of the input is
>   non-zero
> - common (and in one case also ARM) code using fls() when flsl() was
>   meant
>
> Also drop ARM's constant_fls() in favor of the identical generic_fls().
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

The refactoring looks fine, so Reviewed-by: Andrew Cooper
<andrew.cooper3@xxxxxxxxxx>

As for the x86 asm itself, what about
http://lkml.kernel.org/r/5058741E020000780009C014@xxxxxxxxxxxxxxxxxxxx ?
It would appear that the same optimisation is relevant for Xens __ffs()

~Andrew

>
> --- unstable.orig/xen/common/page_alloc.c     2014-11-07 17:16:38.000000000 
> +0100
> +++ unstable/xen/common/page_alloc.c  2015-01-21 08:53:14.000000000 +0100
> @@ -278,7 +278,7 @@ unsigned long __init alloc_boot_pages(
>  
>  #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 1 : ((b) - PAGE_SHIFT))
>  #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN :  \
> -                          (fls(page_to_mfn(pg)) ? : 1))
> +                          (flsl(page_to_mfn(pg)) ? : 1))
>  
>  typedef struct page_list_head 
> heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
>  static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
> @@ -1259,7 +1259,7 @@ void __init end_boot_allocator(void)
>      {
>  #ifdef CONFIG_X86
>          dma_bitsize = min_t(unsigned int,
> -                            fls(NODE_DATA(0)->node_spanned_pages) - 1
> +                            flsl(NODE_DATA(0)->node_spanned_pages) - 1
>                              + PAGE_SHIFT - 2,
>                              32);
>  #else
> @@ -1544,7 +1544,7 @@ static unsigned int __read_mostly xenhea
>  
>  void __init xenheap_max_mfn(unsigned long mfn)
>  {
> -    xenheap_bits = fls(mfn) + PAGE_SHIFT;
> +    xenheap_bits = flsl(mfn) + PAGE_SHIFT;
>  }
>  
>  void init_xenheap_pages(paddr_t ps, paddr_t pe)
> --- unstable.orig/xen/common/xmalloc_tlsf.c   2015-01-22 13:28:41.000000000 
> +0100
> +++ unstable/xen/common/xmalloc_tlsf.c        2015-01-21 08:53:05.000000000 
> +0100
> @@ -138,9 +138,9 @@ static inline void MAPPING_SEARCH(unsign
>      }
>      else
>      {
> -        t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1;
> +        t = (1 << (flsl(*r) - 1 - MAX_LOG2_SLI)) - 1;
>          *r = *r + t;
> -        *fl = fls(*r) - 1;
> +        *fl = flsl(*r) - 1;
>          *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
>          *fl -= FLI_OFFSET;
>          /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0!
> @@ -164,7 +164,7 @@ static inline void MAPPING_INSERT(unsign
>      }
>      else
>      {
> -        *fl = fls(r) - 1;
> +        *fl = flsl(r) - 1;
>          *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
>          *fl -= FLI_OFFSET;
>      }
> --- unstable.orig/xen/include/asm-arm/arm32/bitops.h  2015-01-22 
> 13:28:41.000000000 +0100
> +++ unstable/xen/include/asm-arm/arm32/bitops.h       2015-01-21 
> 10:06:01.000000000 +0100
> @@ -15,6 +15,8 @@ extern int _test_and_change_bit(int nr, 
>  #define test_and_clear_bit(n,p)   _test_and_clear_bit(n,p)
>  #define test_and_change_bit(n,p)  _test_and_change_bit(n,p)
>  
> +#define flsl fls
> +
>  /*
>   * Little endian assembly bitops.  nr = 0 -> byte 0 bit 0.
>   */
> --- unstable.orig/xen/include/asm-arm/arm64/bitops.h  2015-01-22 
> 13:28:41.000000000 +0100
> +++ unstable/xen/include/asm-arm/arm64/bitops.h       2015-01-21 
> 08:28:43.000000000 +0100
> @@ -32,6 +32,17 @@ static /*__*/always_inline unsigned long
>   */
>  #define ffz(x)  __ffs(~(x))
>  
> +static inline int flsl(unsigned long x)
> +{
> +        int ret;
> +
> +        if (__builtin_constant_p(x))
> +               return generic_flsl(x);
> +
> +        asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
> +        return BITS_PER_LONG - ret;
> +}
> +
>  /* Based on linux/include/asm-generic/bitops/find.h */
>  
>  #ifndef find_next_bit
> --- unstable.orig/xen/include/asm-arm/bitops.h        2015-01-22 
> 13:28:41.000000000 +0100
> +++ unstable/xen/include/asm-arm/bitops.h     2015-01-22 13:30:17.000000000 
> +0100
> @@ -99,46 +99,17 @@ static inline int test_bit(int nr, const
>          return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_WORD-1)));
>  }
>  
> -static inline int constant_fls(int x)
> -{
> -        int r = 32;
> -
> -        if (!x)
> -                return 0;
> -        if (!(x & 0xffff0000u)) {
> -                x <<= 16;
> -                r -= 16;
> -        }
> -        if (!(x & 0xff000000u)) {
> -                x <<= 8;
> -                r -= 8;
> -        }
> -        if (!(x & 0xf0000000u)) {
> -                x <<= 4;
> -                r -= 4;
> -        }
> -        if (!(x & 0xc0000000u)) {
> -                x <<= 2;
> -                r -= 2;
> -        }
> -        if (!(x & 0x80000000u)) {
> -                x <<= 1;
> -                r -= 1;
> -        }
> -        return r;
> -}
> -
>  /*
>   * On ARMv5 and above those functions can be implemented around
>   * the clz instruction for much better code efficiency.
>   */
>  
> -static inline int fls(int x)
> +static inline int fls(unsigned int x)
>  {
>          int ret;
>  
>          if (__builtin_constant_p(x))
> -               return constant_fls(x);
> +               return generic_fls(x);
>  
>          asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
>          ret = BITS_PER_LONG - ret;
> @@ -146,7 +117,8 @@ static inline int fls(int x)
>  }
>  
>  
> -#define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); })
> +#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); })
> +#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); })
>  
>  /**
>   * find_first_set_bit - find the first set bit in @word
> @@ -157,7 +129,7 @@ static inline int fls(int x)
>   */
>  static inline unsigned int find_first_set_bit(unsigned long word)
>  {
> -        return ffs(word) - 1;
> +        return ffsl(word) - 1;
>  }
>  
>  /**
> --- unstable.orig/xen/include/asm-x86/bitops.h        2014-09-15 
> 15:42:35.000000000 +0200
> +++ unstable/xen/include/asm-x86/bitops.h     2015-01-22 13:30:02.000000000 
> +0100
> @@ -401,7 +401,7 @@ static inline unsigned int find_first_se
>   *
>   * This is defined the same way as the libc and compiler builtin ffs 
> routines.
>   */
> -static inline int ffs(unsigned long x)
> +static inline int ffsl(unsigned long x)
>  {
>      long r;
>  
> @@ -412,13 +412,24 @@ static inline int ffs(unsigned long x)
>      return (int)r+1;
>  }
>  
> +static inline int ffs(unsigned int x)
> +{
> +    int r;
> +
> +    asm ( "bsf %1,%0\n\t"
> +          "jnz 1f\n\t"
> +          "mov $-1,%0\n"
> +          "1:" : "=r" (r) : "rm" (x));
> +    return r + 1;
> +}
> +
>  /**
>   * fls - find last bit set
>   * @x: the word to search
>   *
>   * This is defined the same way as ffs.
>   */
> -static inline int fls(unsigned long x)
> +static inline int flsl(unsigned long x)
>  {
>      long r;
>  
> @@ -429,8 +440,16 @@ static inline int fls(unsigned long x)
>      return (int)r+1;
>  }
>  
> -#define ffs64 ffs
> -#define fls64 fls
> +static inline int fls(unsigned int x)
> +{
> +    int r;
> +
> +    asm ( "bsr %1,%0\n\t"
> +          "jnz 1f\n\t"
> +          "mov $-1,%0\n"
> +          "1:" : "=r" (r) : "rm" (x));
> +    return r + 1;
> +}
>  
>  /**
>   * hweightN - returns the hamming weight of a N-bit word
> --- unstable.orig/xen/include/xen/bitops.h    2015-01-22 13:28:41.000000000 
> +0100
> +++ unstable/xen/include/xen/bitops.h 2015-01-22 13:30:46.000000000 +0100
> @@ -70,20 +70,52 @@ static __inline__ int generic_fls(int x)
>      return r;
>  }
>  
> +#if BITS_PER_LONG == 64
> +
> +static inline int generic_ffsl(unsigned long x)
> +{
> +    return !x || (u32)x ? generic_ffs(x) : generic_ffs(x >> 32) + 32;
> +}
> +
> +static inline int generic_flsl(unsigned long x)
> +{
> +    u32 h = x >> 32;
> +
> +    return h ? generic_fls(h) + 32 : generic_fls(x);
> +}
> +
> +#else
> +# define generic_ffsl generic_ffs
> +# define generic_flsl generic_fls
> +#endif
> +
>  /*
>   * Include this here because some architectures need generic_ffs/fls in
>   * scope
>   */
>  #include <asm/bitops.h>
>  
> -
> +#if BITS_PER_LONG == 64
> +# define fls64 flsl
> +# define ffs64 ffsl
> +#else
> +# ifndef ffs64
> +static inline int generic_ffs64(__u64 x)
> +{
> +    return !x || (__u32)x ? ffs(x) : ffs(x >> 32) + 32;
> +}
> +#  define ffs64 generic_ffs64
> +# endif
> +# ifndef fls64
>  static inline int generic_fls64(__u64 x)
>  {
>      __u32 h = x >> 32;
> -    if (h)
> -        return fls(x) + 32;
> -    return fls(x);
> +
> +    return h ? fls(h) + 32 : fls(x);
>  }
> +#  define fls64 generic_fls64
> +# endif
> +#endif
>  
>  static __inline__ int get_bitmask_order(unsigned int count)
>  {
>
>



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.