[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v2 11/13] xen/bitops: Implement fls()/flsl() in common logic
From: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx> This is most easily done together because of how arm32 is currently structured, but it does just mirror the existing ffs()/ffsl() work. Introduce compile and boot time testing. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- CC: Jan Beulich <JBeulich@xxxxxxxx> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx> CC: Wei Liu <wl@xxxxxxx> CC: Stefano Stabellini <sstabellini@xxxxxxxxxx> CC: Julien Grall <julien@xxxxxxx> CC: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx> CC: Bertrand Marquis <bertrand.marquis@xxxxxxx> CC: Michal Orzel <michal.orzel@xxxxxxx> CC: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx> CC: Shawn Anastasio <sanastasio@xxxxxxxxxxxxxxxxxxxxx> CC: consulting@xxxxxxxxxxx <consulting@xxxxxxxxxxx> CC: Simone Ballarin <simone.ballarin@xxxxxxxxxxx> CC: Federico Serafini <federico.serafini@xxxxxxxxxxx> CC: Nicola Vetrini <nicola.vetrini@xxxxxxxxxxx> v2: * New, incorperated from Oleksii's RISC-V series and adjusted. for x86: add/remove: 0/0 grow/shrink: 3/17 up/down: 28/-153 (-125) Function old new delta pci_enable_msi 1033 1049 +16 vlapic_lowest_prio 330 338 +8 kexec_early_calculations 53 57 +4 pci_restore_msi_state 1159 1157 -2 arch_hwdom_irqs 61 59 -2 control_read 132 129 -3 pci_enable_msi.cold 121 117 -4 arch_get_dma_bitsize 173 169 -4 xmem_pool_alloc 1039 1032 -7 xenheap_max_mfn 49 42 -7 mba_sanitize_thrtl 83 76 -7 xstate_init 807 799 -8 offline_page 965 957 -8 apicid_to_socket 160 152 -8 vlapic_find_highest_vector 61 48 -13 xmem_pool_free 983 967 -16 iommu_alloc 935 919 -16 free_heap_pages 1512 1496 -16 detect_ht 318 302 -16 alloc_heap_pages 1569 1553 -16 showing that the optimiser can now do a better job in most cases. --- xen/arch/arm/include/asm/arm32/bitops.h | 2 -- xen/arch/arm/include/asm/arm64/bitops.h | 12 ------- xen/arch/arm/include/asm/bitops.h | 19 ++-------- xen/arch/ppc/include/asm/bitops.h | 4 +-- xen/arch/x86/include/asm/bitops.h | 46 +++++++++++++++---------- xen/common/bitops.c | 25 ++++++++++++++ xen/include/xen/bitops.h | 24 +++++++++++++ 7 files changed, 80 insertions(+), 52 deletions(-) diff --git a/xen/arch/arm/include/asm/arm32/bitops.h b/xen/arch/arm/include/asm/arm32/bitops.h index d0309d47c188..0d7bb12d5c19 100644 --- a/xen/arch/arm/include/asm/arm32/bitops.h +++ b/xen/arch/arm/include/asm/arm32/bitops.h @@ -1,8 +1,6 @@ #ifndef _ARM_ARM32_BITOPS_H #define _ARM_ARM32_BITOPS_H -#define flsl fls - /* * Little endian assembly bitops. nr = 0 -> byte 0 bit 0. */ diff --git a/xen/arch/arm/include/asm/arm64/bitops.h b/xen/arch/arm/include/asm/arm64/bitops.h index 906d84e5f295..a6135838dcfa 100644 --- a/xen/arch/arm/include/asm/arm64/bitops.h +++ b/xen/arch/arm/include/asm/arm64/bitops.h @@ -1,18 +1,6 @@ #ifndef _ARM_ARM64_BITOPS_H #define _ARM_ARM64_BITOPS_H -static inline int flsl(unsigned long x) -{ - uint64_t ret; - - if (__builtin_constant_p(x)) - return generic_flsl(x); - - asm("clz\t%0, %1" : "=r" (ret) : "r" (x)); - - return BITS_PER_LONG - ret; -} - /* Based on linux/include/asm-generic/bitops/find.h */ #ifndef CONFIG_GENERIC_FIND_FIRST_BIT diff --git a/xen/arch/arm/include/asm/bitops.h b/xen/arch/arm/include/asm/bitops.h index d30ba44598e3..8f4bdc09d128 100644 --- a/xen/arch/arm/include/asm/bitops.h +++ b/xen/arch/arm/include/asm/bitops.h @@ -140,25 +140,10 @@ static inline int test_bit(int nr, const volatile void *addr) return 1UL & (p[BITOP_WORD(nr)] >> (nr & (BITOP_BITS_PER_WORD-1))); } -/* - * On ARMv5 and above those functions can be implemented around - * the clz instruction for much better code efficiency. - */ - -static inline int fls(unsigned int x) -{ - int ret; - - if (__builtin_constant_p(x)) - return generic_flsl(x); - - asm("clz\t%"__OP32"0, %"__OP32"1" : "=r" (ret) : "r" (x)); - return 32 - ret; -} - - #define arch_ffs(x) ((x) ? 1 + __builtin_ctz(x) : 0) #define arch_ffsl(x) ((x) ? 1 + __builtin_ctzl(x) : 0) +#define arch_fls(x) ((x) ? 32 - __builtin_clz(x) : 0) +#define arch_flsl(x) ((x) ? BITS_PER_LONG - __builtin_clzl(x) : 0) /** * hweightN - returns the hamming weight of a N-bit word diff --git a/xen/arch/ppc/include/asm/bitops.h b/xen/arch/ppc/include/asm/bitops.h index 761361291e6f..8119b5ace877 100644 --- a/xen/arch/ppc/include/asm/bitops.h +++ b/xen/arch/ppc/include/asm/bitops.h @@ -171,10 +171,10 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr) return (old & mask) != 0; } -#define flsl(x) generic_flsl(x) -#define fls(x) generic_flsl(x) #define arch_ffs(x) ((x) ? 1 + __builtin_ctz(x) : 0) #define arch_ffsl(x) ((x) ? 1 + __builtin_ctzl(x) : 0) +#define arch_fls(x) ((x) ? 32 - __builtin_clz(x) : 0) +#define arch_flsl(x) ((x) ? BITS_PER_LONG - __builtin_clzl(x) : 0) /** * hweightN - returns the hamming weight of a N-bit word diff --git a/xen/arch/x86/include/asm/bitops.h b/xen/arch/x86/include/asm/bitops.h index 830e488f33a0..fc9fe73ad5ba 100644 --- a/xen/arch/x86/include/asm/bitops.h +++ b/xen/arch/x86/include/asm/bitops.h @@ -447,33 +447,41 @@ static always_inline unsigned int arch_ffsl(unsigned long x) } #define arch_ffsl arch_ffsl -/** - * fls - find last bit set - * @x: the word to search - * - * This is defined the same way as ffs. - */ -static inline int flsl(unsigned long x) +static always_inline unsigned int arch_fls(unsigned int x) { - long r; + unsigned int r; + + /* See arch_ffs() for safety discussions. */ + if ( __builtin_constant_p(x > 0) && x > 0 ) + asm ( "bsr %[val], %[res]" + : [res] "=r" (r) + : [val] "rm" (x) ); + else + asm ( "bsr %[val], %[res]" + : [res] "=r" (r) + : [val] "rm" (x), "[res]" (-1) ); - asm ( "bsr %1,%0\n\t" - "jnz 1f\n\t" - "mov $-1,%0\n" - "1:" : "=r" (r) : "rm" (x)); - return (int)r+1; + return r + 1; } +#define arch_fls arch_fls -static inline int fls(unsigned int x) +static always_inline unsigned int arch_flsl(unsigned long x) { - int r; + unsigned int r; + + /* See arch_ffs() for safety discussions. */ + if ( __builtin_constant_p(x > 0) && x > 0 ) + asm ( "bsr %[val], %q[res]" + : [res] "=r" (r) + : [val] "rm" (x) ); + else + asm ( "bsr %[val], %q[res]" + : [res] "=r" (r) + : [val] "rm" (x), "[res]" (-1) ); - asm ( "bsr %1,%0\n\t" - "jnz 1f\n\t" - "mov $-1,%0\n" - "1:" : "=r" (r) : "rm" (x)); return r + 1; } +#define arch_flsl arch_flsl /** * hweightN - returns the hamming weight of a N-bit word diff --git a/xen/common/bitops.c b/xen/common/bitops.c index b3813f818198..b4845d9e84d1 100644 --- a/xen/common/bitops.c +++ b/xen/common/bitops.c @@ -26,7 +26,32 @@ static void __init test_ffs(void) #endif } +static void __init test_fls(void) +{ + /* unsigned int fls(unsigned int) */ + CHECK(fls, 0, 0); + CHECK(fls, 1, 1); + CHECK(fls, 3, 2); + CHECK(fls, 7, 3); + CHECK(fls, 6, 3); + CHECK(fls, 0x80000000U, 32); + + /* unsigned int flsl(unsigned long) */ + CHECK(flsl, 0, 0); + CHECK(flsl, 1, 1); + CHECK(flsl, 3, 2); + CHECK(flsl, 7, 3); + CHECK(flsl, 6, 3); + + CHECK(flsl, 1 | (1UL << (BITS_PER_LONG - 1)), BITS_PER_LONG); +#if BITS_PER_LONG > 32 + CHECK(flsl, 1 | (1UL << 32), 33); + CHECK(flsl, 1 | (1UL << 63), 64); +#endif +} + static void __init __constructor test_bitops(void) { test_ffs(); + test_fls(); } diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h index 88cf27a88bcf..e7df6377372d 100644 --- a/xen/include/xen/bitops.h +++ b/xen/include/xen/bitops.h @@ -60,6 +60,30 @@ static always_inline __pure unsigned int ffsl(unsigned long x) #endif } +static always_inline __pure unsigned int fls(unsigned int x) +{ + if ( __builtin_constant_p(x) ) + return x ? 32 - __builtin_clz(x) : 0; + +#ifdef arch_fls + return arch_fls(x); +#else + return generic_flsl(x); +#endif +} + +static always_inline __pure unsigned int flsl(unsigned long x) +{ + if ( __builtin_constant_p(x) ) + return x ? BITS_PER_LONG - __builtin_clzl(x) : 0; + +#ifdef arch_fls + return arch_flsl(x); +#else + return generic_flsl(x); +#endif +} + /* --------------------- Please tidy below here --------------------- */ #ifndef find_next_bit -- 2.30.2
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |