csky: atomic: Optimize cmpxchg with acquire & release
Optimize cmpxchg with ASM acquire/release fence ASM instructions
instead of previous generic based. Prevent a fence when cmxchg's
first load != old.
Comments by Rutland:
8e86f0b409
("arm64: atomics: fix use of acquire + release for
full barrier semantics")
Comments by Boqun:
FWIW, you probably need to make sure that a barrier instruction inside
an lr/sc loop is a good thing. IIUC, the execution time of a barrier
instruction is determined by the status of store buffers and invalidate
queues (and probably other stuffs), so it may increase the execution
time of the lr/sc loop, and make it unlikely to succeed. But this really
depends on how the arch executes these instructions.
Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
This commit is contained in:
parent
8318f7c231
commit
186f69b64c
|
@ -37,17 +37,21 @@
|
||||||
* bar.brar
|
* bar.brar
|
||||||
* bar.bwaw
|
* bar.bwaw
|
||||||
*/
|
*/
|
||||||
|
#define FULL_FENCE ".long 0x842fc000\n"
|
||||||
|
#define ACQUIRE_FENCE ".long 0x8427c000\n"
|
||||||
|
#define RELEASE_FENCE ".long 0x842ec000\n"
|
||||||
|
|
||||||
#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
|
#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
|
||||||
#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
|
#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
|
||||||
#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
|
#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
|
||||||
#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
|
#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
|
||||||
#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
|
#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
|
||||||
#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
|
#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
|
||||||
#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
|
#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
|
||||||
#define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory")
|
#define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory")
|
||||||
#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
|
#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
|
||||||
#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
|
#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
|
||||||
#define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory")
|
#define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory")
|
||||||
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
|
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
|
||||||
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
|
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
|
||||||
#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
|
#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
|
||||||
|
@ -56,7 +60,6 @@
|
||||||
#define __smp_rmb() __bar_brar()
|
#define __smp_rmb() __bar_brar()
|
||||||
#define __smp_wmb() __bar_bwaw()
|
#define __smp_wmb() __bar_bwaw()
|
||||||
|
|
||||||
#define ACQUIRE_FENCE ".long 0x8427c000\n"
|
|
||||||
#define __smp_acquire_fence() __bar_brarw()
|
#define __smp_acquire_fence() __bar_brarw()
|
||||||
#define __smp_release_fence() __bar_brwaw()
|
#define __smp_release_fence() __bar_brwaw()
|
||||||
|
|
||||||
|
|
|
@ -64,15 +64,71 @@ extern void __bad_xchg(void);
|
||||||
#define arch_cmpxchg_relaxed(ptr, o, n) \
|
#define arch_cmpxchg_relaxed(ptr, o, n) \
|
||||||
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
|
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
|
||||||
|
|
||||||
#define arch_cmpxchg(ptr, o, n) \
|
#define __cmpxchg_acquire(ptr, old, new, size) \
|
||||||
({ \
|
({ \
|
||||||
|
__typeof__(ptr) __ptr = (ptr); \
|
||||||
|
__typeof__(new) __new = (new); \
|
||||||
|
__typeof__(new) __tmp; \
|
||||||
|
__typeof__(old) __old = (old); \
|
||||||
__typeof__(*(ptr)) __ret; \
|
__typeof__(*(ptr)) __ret; \
|
||||||
__smp_release_fence(); \
|
switch (size) { \
|
||||||
__ret = arch_cmpxchg_relaxed(ptr, o, n); \
|
case 4: \
|
||||||
__smp_acquire_fence(); \
|
asm volatile ( \
|
||||||
|
"1: ldex.w %0, (%3) \n" \
|
||||||
|
" cmpne %0, %4 \n" \
|
||||||
|
" bt 2f \n" \
|
||||||
|
" mov %1, %2 \n" \
|
||||||
|
" stex.w %1, (%3) \n" \
|
||||||
|
" bez %1, 1b \n" \
|
||||||
|
ACQUIRE_FENCE \
|
||||||
|
"2: \n" \
|
||||||
|
: "=&r" (__ret), "=&r" (__tmp) \
|
||||||
|
: "r" (__new), "r"(__ptr), "r"(__old) \
|
||||||
|
:); \
|
||||||
|
break; \
|
||||||
|
default: \
|
||||||
|
__bad_xchg(); \
|
||||||
|
} \
|
||||||
__ret; \
|
__ret; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
#define arch_cmpxchg_acquire(ptr, o, n) \
|
||||||
|
(__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
|
||||||
|
|
||||||
|
#define __cmpxchg(ptr, old, new, size) \
|
||||||
|
({ \
|
||||||
|
__typeof__(ptr) __ptr = (ptr); \
|
||||||
|
__typeof__(new) __new = (new); \
|
||||||
|
__typeof__(new) __tmp; \
|
||||||
|
__typeof__(old) __old = (old); \
|
||||||
|
__typeof__(*(ptr)) __ret; \
|
||||||
|
switch (size) { \
|
||||||
|
case 4: \
|
||||||
|
asm volatile ( \
|
||||||
|
RELEASE_FENCE \
|
||||||
|
"1: ldex.w %0, (%3) \n" \
|
||||||
|
" cmpne %0, %4 \n" \
|
||||||
|
" bt 2f \n" \
|
||||||
|
" mov %1, %2 \n" \
|
||||||
|
" stex.w %1, (%3) \n" \
|
||||||
|
" bez %1, 1b \n" \
|
||||||
|
FULL_FENCE \
|
||||||
|
"2: \n" \
|
||||||
|
: "=&r" (__ret), "=&r" (__tmp) \
|
||||||
|
: "r" (__new), "r"(__ptr), "r"(__old) \
|
||||||
|
:); \
|
||||||
|
break; \
|
||||||
|
default: \
|
||||||
|
__bad_xchg(); \
|
||||||
|
} \
|
||||||
|
__ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define arch_cmpxchg(ptr, o, n) \
|
||||||
|
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
|
||||||
|
|
||||||
|
#define arch_cmpxchg_local(ptr, o, n) \
|
||||||
|
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
|
||||||
#else
|
#else
|
||||||
#include <asm-generic/cmpxchg.h>
|
#include <asm-generic/cmpxchg.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue