[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 06/13] xen/bitops: Implement ffs() in common logic



On Fri, 24 May 2024, Andrew Cooper wrote:
> Perform constant-folding unconditionally, rather than having it implemented
> inconsistency between architectures.
> 
> Confirm the expected behaviour with compile time and boot time tests.
> 
> For non-constant inputs, use arch_ffs() if provided but fall back to
> generic_ffsl() if not.  In particular, RISC-V doesn't have a builtin that
> works in all configurations.
> 
> For x86, rename ffs() to arch_ffs() and adjust the prototype.
> 
> For PPC, __builtin_ctz() is 1/3 of the size of size of the transform to
> generic_fls().  Drop the definition entirely.  ARM too benefits in the general
> case by using __builtin_ctz(), but less dramatically because it using
> optimised asm().
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>

This patch made me realize that we should add __builtin_ctz,
__builtin_constant_p and always_inline to
docs/misra/C-language-toolchain.rst as they don't seem to be currently
documented and they are not part of the C standard

Patch welcome :-)

Reviewed-by: Stefano Stabellini <sstabellini@xxxxxxxxxx>


> ---
> CC: Jan Beulich <JBeulich@xxxxxxxx>
> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> CC: Wei Liu <wl@xxxxxxx>
> CC: Stefano Stabellini <sstabellini@xxxxxxxxxx>
> CC: Julien Grall <julien@xxxxxxx>
> CC: Volodymyr Babchuk <Volodymyr_Babchuk@xxxxxxxx>
> CC: Bertrand Marquis <bertrand.marquis@xxxxxxx>
> CC: Michal Orzel <michal.orzel@xxxxxxx>
> CC: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx>
> CC: Shawn Anastasio <sanastasio@xxxxxxxxxxxxxxxxxxxxx>
> CC: consulting@xxxxxxxxxxx <consulting@xxxxxxxxxxx>
> CC: Simone Ballarin <simone.ballarin@xxxxxxxxxxx>
> CC: Federico Serafini <federico.serafini@xxxxxxxxxxx>
> CC: Nicola Vetrini <nicola.vetrini@xxxxxxxxxxx>
> 
> v2:
>  * Fall back to generic, not builtin.
>  * Extend the testing with multi-bit values.
>  * Use always_inline for x86
>  * Defer x86 optimisation to a later change
> ---
>  xen/arch/arm/include/asm/bitops.h |  2 +-
>  xen/arch/ppc/include/asm/bitops.h |  2 +-
>  xen/arch/x86/include/asm/bitops.h |  3 ++-
>  xen/common/Makefile               |  1 +
>  xen/common/bitops.c               | 19 +++++++++++++++++++
>  xen/include/xen/bitops.h          | 17 +++++++++++++++++
>  6 files changed, 41 insertions(+), 3 deletions(-)
>  create mode 100644 xen/common/bitops.c
> 
> diff --git a/xen/arch/arm/include/asm/bitops.h 
> b/xen/arch/arm/include/asm/bitops.h
> index ec1cf7b9b323..a88ec2612e16 100644
> --- a/xen/arch/arm/include/asm/bitops.h
> +++ b/xen/arch/arm/include/asm/bitops.h
> @@ -157,7 +157,7 @@ static inline int fls(unsigned int x)
>  }
>  
>  
> -#define ffs(x) ({ unsigned int __t = (x); fls(ISOLATE_LSB(__t)); })
> +#define arch_ffs(x)  ((x) ? 1 + __builtin_ctz(x) : 0)
>  #define ffsl(x) ({ unsigned long __t = (x); flsl(ISOLATE_LSB(__t)); })
>  
>  /**
> diff --git a/xen/arch/ppc/include/asm/bitops.h 
> b/xen/arch/ppc/include/asm/bitops.h
> index ab692d01717b..5c36a6cc0ce3 100644
> --- a/xen/arch/ppc/include/asm/bitops.h
> +++ b/xen/arch/ppc/include/asm/bitops.h
> @@ -173,7 +173,7 @@ static inline int __test_and_clear_bit(int nr, volatile 
> void *addr)
>  
>  #define flsl(x) generic_flsl(x)
>  #define fls(x) generic_flsl(x)
> -#define ffs(x) ({ unsigned int t_ = (x); fls(t_ & -t_); })
> +#define arch_ffs(x)  ((x) ? 1 + __builtin_ctz(x) : 0)
>  #define ffsl(x) ({ unsigned long t_ = (x); flsl(t_ & -t_); })
>  
>  /**
> diff --git a/xen/arch/x86/include/asm/bitops.h 
> b/xen/arch/x86/include/asm/bitops.h
> index 5a71afbc89d5..122767fc0d10 100644
> --- a/xen/arch/x86/include/asm/bitops.h
> +++ b/xen/arch/x86/include/asm/bitops.h
> @@ -430,7 +430,7 @@ static inline int ffsl(unsigned long x)
>      return (int)r+1;
>  }
>  
> -static inline int ffs(unsigned int x)
> +static always_inline unsigned int arch_ffs(unsigned int x)
>  {
>      int r;
>  
> @@ -440,6 +440,7 @@ static inline int ffs(unsigned int x)
>            "1:" : "=r" (r) : "rm" (x));
>      return r + 1;
>  }
> +#define arch_ffs arch_ffs
>  
>  /**
>   * fls - find last bit set
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index d512cad5243f..21a4fb4c7166 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -1,5 +1,6 @@
>  obj-$(CONFIG_ARGO) += argo.o
>  obj-y += bitmap.o
> +obj-bin-$(CONFIG_DEBUG) += bitops.init.o
>  obj-$(CONFIG_GENERIC_BUG_FRAME) += bug.o
>  obj-$(CONFIG_HYPFS_CONFIG) += config_data.o
>  obj-$(CONFIG_CORE_PARKING) += core_parking.o
> diff --git a/xen/common/bitops.c b/xen/common/bitops.c
> new file mode 100644
> index 000000000000..8c161b8ea7fa
> --- /dev/null
> +++ b/xen/common/bitops.c
> @@ -0,0 +1,19 @@
> +#include <xen/bitops.h>
> +#include <xen/boot-check.h>
> +#include <xen/init.h>
> +
> +static void __init test_ffs(void)
> +{
> +    /* unsigned int ffs(unsigned int) */
> +    CHECK(ffs, 0, 0);
> +    CHECK(ffs, 1, 1);
> +    CHECK(ffs, 3, 1);
> +    CHECK(ffs, 7, 1);
> +    CHECK(ffs, 6, 2);
> +    CHECK(ffs, 0x80000000U, 32);
> +}
> +
> +static void __init __constructor test_bitops(void)
> +{
> +    test_ffs();
> +}
> diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
> index cd405df96180..f7e90a2893a5 100644
> --- a/xen/include/xen/bitops.h
> +++ b/xen/include/xen/bitops.h
> @@ -31,6 +31,23 @@ unsigned int __pure generic_flsl(unsigned long x);
>  
>  #include <asm/bitops.h>
>  
> +/*
> + * Find First/Last Set bit (all forms).
> + *
> + * Bits are labelled from 1.  Returns 0 if given 0.
> + */
> +static always_inline __pure unsigned int ffs(unsigned int x)
> +{
> +    if ( __builtin_constant_p(x) )
> +        return __builtin_ffs(x);
> +
> +#ifdef arch_ffs
> +    return arch_ffs(x);
> +#else
> +    return generic_ffsl(x);
> +#endif
> +}
> +
>  /* --------------------- Please tidy below here --------------------- */
>  
>  #ifndef find_next_bit
> -- 
> 2.30.2
> 

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.