|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/2] xen: arm: update arm32 assembly primitives to Linux v3.16-rc6
Hi Ian,
On 07/25/2014 04:22 PM, Ian Campbell wrote:
> bitops, cmpxchg, atomics: Import:
> c32ffce ARM: 7984/1: prefetch: add prefetchw invocations for barriered
> atomics
Compare to Linux we don't have specific prefetch* helpers. We directly
use the compiler builtin ones. Shouldn't we import the ARM specific
helpers to gain in performance?
Regards,
> Author: Will Deacon <will.deacon@xxxxxxx>
> Signed-off-by: Will Deacon <will.deacon@xxxxxxx>
> Signed-off-by: Russell King <rmk+kernel@xxxxxxxxxxxxxxxx>
>
> atomics: In addition to the above import:
> db38ee8 ARM: 7983/1: atomics: implement a better __atomic_add_unless for v6+
> Author: Will Deacon <will.deacon@xxxxxxx>
> Signed-off-by: Will Deacon <will.deacon@xxxxxxx>
> Signed-off-by: Russell King <rmk+kernel@xxxxxxxxxxxxxxxx>
>
> spinlocks: We have diverged from Linux, so no updates but note this in the
> README.
>
> mem* and str*: Import:
> d98b90e ARM: 7990/1: asm: rename logical shift macros push pull into lspush
> lspull
> Author: Victor Kamensky <victor.kamensky@xxxxxxxxxx>
> Suggested-by: Will Deacon <will.deacon@xxxxxxx>
> Signed-off-by: Victor Kamensky <victor.kamensky@xxxxxxxxxx>
> Acked-by: Nicolas Pitre <nico@xxxxxxxxxx>
> Signed-off-by: Russell King <rmk+kernel@xxxxxxxxxxxxxxxx>
>
> For some reason str* were mentioned under mem* in the README, fix.
>
> libgcc: No changes, update baseline
>
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> ---
> xen/arch/arm/README.LinuxPrimitives | 17 +++++++--------
> xen/arch/arm/arm32/lib/assembler.h | 8 +++----
> xen/arch/arm/arm32/lib/bitops.h | 5 +++++
> xen/arch/arm/arm32/lib/copy_template.S | 36
> ++++++++++++++++----------------
> xen/arch/arm/arm32/lib/memmove.S | 36
> ++++++++++++++++----------------
> xen/include/asm-arm/arm32/atomic.h | 32 ++++++++++++++++++++++++++++
> xen/include/asm-arm/arm32/cmpxchg.h | 5 +++++
> 7 files changed, 90 insertions(+), 49 deletions(-)
>
> diff --git a/xen/arch/arm/README.LinuxPrimitives
> b/xen/arch/arm/README.LinuxPrimitives
> index 69eeb70..7e15b04 100644
> --- a/xen/arch/arm/README.LinuxPrimitives
> +++ b/xen/arch/arm/README.LinuxPrimitives
> @@ -65,7 +65,7 @@ linux/arch/arm64/lib/copy_page.S unused in Xen
> arm32
> =====================================================================
>
> -bitops: last sync @ v3.14-rc7 (last commit: b7ec699)
> +bitops: last sync @ v3.16-rc6 (last commit: c32ffce0f66e)
>
> linux/arch/arm/lib/bitops.h xen/arch/arm/arm32/lib/bitops.h
> linux/arch/arm/lib/changebit.S xen/arch/arm/arm32/lib/changebit.S
> @@ -83,13 +83,13 @@ done
>
> ---------------------------------------------------------------------
>
> -cmpxchg: last sync @ v3.14-rc7 (last commit: 775ebcc)
> +cmpxchg: last sync @ v3.16-rc6 (last commit: c32ffce0f66e)
>
> linux/arch/arm/include/asm/cmpxchg.h xen/include/asm-arm/arm32/cmpxchg.h
>
> ---------------------------------------------------------------------
>
> -atomics: last sync @ v3.14-rc7 (last commit: aed3a4e)
> +atomics: last sync @ v3.16-rc6 (last commit: 030d0178bdbd)
>
> linux/arch/arm/include/asm/atomic.h xen/include/asm-arm/arm32/atomic.h
>
> @@ -99,6 +99,8 @@ spinlocks: last sync: 15e7e5c1ebf5
>
> linux/arch/arm/include/asm/spinlock.h xen/include/asm-arm/arm32/spinlock.h
>
> +*** Linux has switched to ticket locks but we still use bitlocks.
> +
> resync to v3.14-rc7:
>
> 7c8746a ARM: 7955/1: spinlock: ensure we have a compiler barrier before sev
> @@ -111,7 +113,7 @@ resync to v3.14-rc7:
>
> ---------------------------------------------------------------------
>
> -mem*: last sync @ v3.14-rc7 (last commit: 418df63a)
> +mem*: last sync @ v3.16-rc6 (last commit: d98b90ea22b0)
>
> linux/arch/arm/lib/copy_template.S
> xen/arch/arm/arm32/lib/copy_template.S
> linux/arch/arm/lib/memchr.S xen/arch/arm/arm32/lib/memchr.S
> @@ -120,9 +122,6 @@ linux/arch/arm/lib/memmove.S
> xen/arch/arm/arm32/lib/memmove.S
> linux/arch/arm/lib/memset.S xen/arch/arm/arm32/lib/memset.S
> linux/arch/arm/lib/memzero.S xen/arch/arm/arm32/lib/memzero.S
>
> -linux/arch/arm/lib/strchr.S xen/arch/arm/arm32/lib/strchr.S
> -linux/arch/arm/lib/strrchr.S xen/arch/arm/arm32/lib/strrchr.S
> -
> for i in copy_template.S memchr.S memcpy.S memmove.S memset.S \
> memzero.S ; do
> diff -u linux/arch/arm/lib/$i xen/arch/arm/arm32/lib/$i
> @@ -130,7 +129,7 @@ done
>
> ---------------------------------------------------------------------
>
> -str*: last sync @ v3.13-rc7 (last commit: 93ed397)
> +str*: last sync @ v3.16-rc6 (last commit: d98b90ea22b0)
>
> linux/arch/arm/lib/strchr.S xen/arch/arm/arm32/lib/strchr.S
> linux/arch/arm/lib/strrchr.S xen/arch/arm/arm32/lib/strrchr.S
> @@ -145,7 +144,7 @@ clear_page == memset
>
> ---------------------------------------------------------------------
>
> -libgcc: last sync @ v3.14-rc7 (last commit: 01885bc)
> +libgcc: last sync @ v3.16-rc6 (last commit: 01885bc)
>
> linux/arch/arm/lib/lib1funcs.S xen/arch/arm/arm32/lib/lib1funcs.S
> linux/arch/arm/lib/lshrdi3.S xen/arch/arm/arm32/lib/lshrdi3.S
> diff --git a/xen/arch/arm/arm32/lib/assembler.h
> b/xen/arch/arm/arm32/lib/assembler.h
> index f8d4b3a..6de2638 100644
> --- a/xen/arch/arm/arm32/lib/assembler.h
> +++ b/xen/arch/arm/arm32/lib/assembler.h
> @@ -36,8 +36,8 @@
> * Endian independent macros for shifting bytes within registers.
> */
> #ifndef __ARMEB__
> -#define pull lsr
> -#define push lsl
> +#define lspull lsr
> +#define lspush lsl
> #define get_byte_0 lsl #0
> #define get_byte_1 lsr #8
> #define get_byte_2 lsr #16
> @@ -47,8 +47,8 @@
> #define put_byte_2 lsl #16
> #define put_byte_3 lsl #24
> #else
> -#define pull lsl
> -#define push lsr
> +#define lspull lsl
> +#define lspush lsr
> #define get_byte_0 lsr #24
> #define get_byte_1 lsr #16
> #define get_byte_2 lsr #8
> diff --git a/xen/arch/arm/arm32/lib/bitops.h b/xen/arch/arm/arm32/lib/bitops.h
> index 25784c3..a167c2d 100644
> --- a/xen/arch/arm/arm32/lib/bitops.h
> +++ b/xen/arch/arm/arm32/lib/bitops.h
> @@ -37,6 +37,11 @@ UNWIND( .fnstart )
> add r1, r1, r0, lsl #2 @ Get word offset
> mov r3, r2, lsl r3 @ create mask
> smp_dmb
> +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
> + .arch_extension mp
> + ALT_SMP(W(pldw) [r1])
> + ALT_UP(W(nop))
> +#endif
> 1: ldrex r2, [r1]
> ands r0, r2, r3 @ save old value of bit
> \instr r2, r2, r3 @ toggle bit
> diff --git a/xen/arch/arm/arm32/lib/copy_template.S
> b/xen/arch/arm/arm32/lib/copy_template.S
> index 805e3f8..3bc8eb8 100644
> --- a/xen/arch/arm/arm32/lib/copy_template.S
> +++ b/xen/arch/arm/arm32/lib/copy_template.S
> @@ -197,24 +197,24 @@
>
> 12: PLD( pld [r1, #124] )
> 13: ldr4w r1, r4, r5, r6, r7, abort=19f
> - mov r3, lr, pull #\pull
> + mov r3, lr, lspull #\pull
> subs r2, r2, #32
> ldr4w r1, r8, r9, ip, lr, abort=19f
> - orr r3, r3, r4, push #\push
> - mov r4, r4, pull #\pull
> - orr r4, r4, r5, push #\push
> - mov r5, r5, pull #\pull
> - orr r5, r5, r6, push #\push
> - mov r6, r6, pull #\pull
> - orr r6, r6, r7, push #\push
> - mov r7, r7, pull #\pull
> - orr r7, r7, r8, push #\push
> - mov r8, r8, pull #\pull
> - orr r8, r8, r9, push #\push
> - mov r9, r9, pull #\pull
> - orr r9, r9, ip, push #\push
> - mov ip, ip, pull #\pull
> - orr ip, ip, lr, push #\push
> + orr r3, r3, r4, lspush #\push
> + mov r4, r4, lspull #\pull
> + orr r4, r4, r5, lspush #\push
> + mov r5, r5, lspull #\pull
> + orr r5, r5, r6, lspush #\push
> + mov r6, r6, lspull #\pull
> + orr r6, r6, r7, lspush #\push
> + mov r7, r7, lspull #\pull
> + orr r7, r7, r8, lspush #\push
> + mov r8, r8, lspull #\pull
> + orr r8, r8, r9, lspush #\push
> + mov r9, r9, lspull #\pull
> + orr r9, r9, ip, lspush #\push
> + mov ip, ip, lspull #\pull
> + orr ip, ip, lr, lspush #\push
> str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
> bge 12b
> PLD( cmn r2, #96 )
> @@ -225,10 +225,10 @@
> 14: ands ip, r2, #28
> beq 16f
>
> -15: mov r3, lr, pull #\pull
> +15: mov r3, lr, lspull #\pull
> ldr1w r1, lr, abort=21f
> subs ip, ip, #4
> - orr r3, r3, lr, push #\push
> + orr r3, r3, lr, lspush #\push
> str1w r0, r3, abort=21f
> bgt 15b
> CALGN( cmp r2, #0 )
> diff --git a/xen/arch/arm/arm32/lib/memmove.S
> b/xen/arch/arm/arm32/lib/memmove.S
> index 4e142b8..18634c3 100644
> --- a/xen/arch/arm/arm32/lib/memmove.S
> +++ b/xen/arch/arm/arm32/lib/memmove.S
> @@ -148,24 +148,24 @@ ENTRY(memmove)
>
> 12: PLD( pld [r1, #-128] )
> 13: ldmdb r1!, {r7, r8, r9, ip}
> - mov lr, r3, push #\push
> + mov lr, r3, lspush #\push
> subs r2, r2, #32
> ldmdb r1!, {r3, r4, r5, r6}
> - orr lr, lr, ip, pull #\pull
> - mov ip, ip, push #\push
> - orr ip, ip, r9, pull #\pull
> - mov r9, r9, push #\push
> - orr r9, r9, r8, pull #\pull
> - mov r8, r8, push #\push
> - orr r8, r8, r7, pull #\pull
> - mov r7, r7, push #\push
> - orr r7, r7, r6, pull #\pull
> - mov r6, r6, push #\push
> - orr r6, r6, r5, pull #\pull
> - mov r5, r5, push #\push
> - orr r5, r5, r4, pull #\pull
> - mov r4, r4, push #\push
> - orr r4, r4, r3, pull #\pull
> + orr lr, lr, ip, lspull #\pull
> + mov ip, ip, lspush #\push
> + orr ip, ip, r9, lspull #\pull
> + mov r9, r9, lspush #\push
> + orr r9, r9, r8, lspull #\pull
> + mov r8, r8, lspush #\push
> + orr r8, r8, r7, lspull #\pull
> + mov r7, r7, lspush #\push
> + orr r7, r7, r6, lspull #\pull
> + mov r6, r6, lspush #\push
> + orr r6, r6, r5, lspull #\pull
> + mov r5, r5, lspush #\push
> + orr r5, r5, r4, lspull #\pull
> + mov r4, r4, lspush #\push
> + orr r4, r4, r3, lspull #\pull
> stmdb r0!, {r4 - r9, ip, lr}
> bge 12b
> PLD( cmn r2, #96 )
> @@ -176,10 +176,10 @@ ENTRY(memmove)
> 14: ands ip, r2, #28
> beq 16f
>
> -15: mov lr, r3, push #\push
> +15: mov lr, r3, lspush #\push
> ldr r3, [r1, #-4]!
> subs ip, ip, #4
> - orr lr, lr, r3, pull #\pull
> + orr lr, lr, r3, lspull #\pull
> str lr, [r0, #-4]!
> bgt 15b
> CALGN( cmp r2, #0 )
> diff --git a/xen/include/asm-arm/arm32/atomic.h
> b/xen/include/asm-arm/arm32/atomic.h
> index 3d601d1..7ec712f 100644
> --- a/xen/include/asm-arm/arm32/atomic.h
> +++ b/xen/include/asm-arm/arm32/atomic.h
> @@ -39,6 +39,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
> int result;
>
> smp_mb();
> + prefetchw(&v->counter);
>
> __asm__ __volatile__("@ atomic_add_return\n"
> "1: ldrex %0, [%3]\n"
> @@ -78,6 +79,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
> int result;
>
> smp_mb();
> + prefetchw(&v->counter);
>
> __asm__ __volatile__("@ atomic_sub_return\n"
> "1: ldrex %0, [%3]\n"
> @@ -100,6 +102,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old,
> int new)
> unsigned long res;
>
> smp_mb();
> + prefetchw(&ptr->counter);
>
> do {
> __asm__ __volatile__("@ atomic_cmpxchg\n"
> @@ -117,6 +120,35 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old,
> int new)
> return oldval;
> }
>
> +static inline int __atomic_add_unless(atomic_t *v, int a, int u)
> +{
> + int oldval, newval;
> + unsigned long tmp;
> +
> + smp_mb();
> + prefetchw(&v->counter);
> +
> + __asm__ __volatile__ ("@ atomic_add_unless\n"
> +"1: ldrex %0, [%4]\n"
> +" teq %0, %5\n"
> +" beq 2f\n"
> +" add %1, %0, %6\n"
> +" strex %2, %1, [%4]\n"
> +" teq %2, #0\n"
> +" bne 1b\n"
> +"2:"
> + : "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter)
> + : "r" (&v->counter), "r" (u), "r" (a)
> + : "cc");
> +
> + if (oldval != u)
> + smp_mb();
> +
> + return oldval;
> +}
> +
> +#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
> +
> #define atomic_inc(v) atomic_add(1, v)
> #define atomic_dec(v) atomic_sub(1, v)
>
> diff --git a/xen/include/asm-arm/arm32/cmpxchg.h
> b/xen/include/asm-arm/arm32/cmpxchg.h
> index 9a511f2..03e0bed 100644
> --- a/xen/include/asm-arm/arm32/cmpxchg.h
> +++ b/xen/include/asm-arm/arm32/cmpxchg.h
> @@ -1,6 +1,8 @@
> #ifndef __ASM_ARM32_CMPXCHG_H
> #define __ASM_ARM32_CMPXCHG_H
>
> +#include <xen/prefetch.h>
> +
> extern void __bad_xchg(volatile void *, int);
>
> static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int
> size)
> @@ -9,6 +11,7 @@ static inline unsigned long __xchg(unsigned long x, volatile
> void *ptr, int size
> unsigned int tmp;
>
> smp_mb();
> + prefetchw((const void *)ptr);
>
> switch (size) {
> case 1:
> @@ -56,6 +59,8 @@ static always_inline unsigned long __cmpxchg(
> {
> unsigned long oldval, res;
>
> + prefetchw((const void *)ptr);
> +
> switch (size) {
> case 1:
> do {
>
--
Julien Grall
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |