From c0385b24af15020a1e505f2c984db0d7c0d017e1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 3 Feb 2015 12:39:03 +0000 Subject: [PATCH] arm64: introduce CONFIG_ARM64_LSE_ATOMICS as fallback to ll/sc atomics In order to patch in the new atomic instructions at runtime, we need to generate wrappers around the out-of-line exclusive load/store atomics. This patch adds a new Kconfig option, CONFIG_ARM64_LSE_ATOMICS. which causes our atomic functions to branch to the out-of-line ll/sc implementations. To avoid the register spill overhead of the PCS, the out-of-line functions are compiled with specific compiler flags to force out-of-line save/restore of any registers that are usually caller-saved. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 12 ++ arch/arm64/include/asm/atomic.h | 9 ++ arch/arm64/include/asm/atomic_ll_sc.h | 19 ++- arch/arm64/include/asm/atomic_lse.h | 170 ++++++++++++++++++++++++++ arch/arm64/lib/Makefile | 13 ++ arch/arm64/lib/atomic_ll_sc.c | 3 + 6 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/atomic_lse.h create mode 100644 arch/arm64/lib/atomic_ll_sc.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5372e1e0c11c..8dabffa82ef8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -618,6 +618,18 @@ config ARM64_PAN The feature is detected at runtime, and will remain as a 'nop' instruction if the cpu does not implement the feature. +config ARM64_LSE_ATOMICS + bool "ARMv8.1 atomic instructions" + help + As part of the Large System Extensions, ARMv8.1 introduces new + atomic instructions that are designed specifically to scale in + very large systems. + + Say Y here to make use of these instructions for the in-kernel + atomic routines. This incurs a small overhead on CPUs that do + not support these instructions and requires the kernel to be + built with binutils >= 2.25. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 632c47064722..84635f2d3d0a 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -21,6 +21,7 @@ #define __ASM_ATOMIC_H #include +#include #include #include @@ -30,7 +31,15 @@ #ifdef __KERNEL__ +#define __ARM64_IN_ATOMIC_IMPL + +#ifdef CONFIG_ARM64_LSE_ATOMICS +#include +#else #include +#endif + +#undef __ARM64_IN_ATOMIC_IMPL /* * On ARM, ordinary assignment (str instruction) doesn't clear the local diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 66e992a58f6b..c33fa2cd399e 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -21,6 +21,10 @@ #ifndef __ASM_ATOMIC_LL_SC_H #define __ASM_ATOMIC_LL_SC_H +#ifndef __ARM64_IN_ATOMIC_IMPL +#error "please don't include this file directly" +#endif + /* * AArch64 UP and SMP safe atomic ops. We use load exclusive and * store exclusive to ensure that these are atomic. We may loop @@ -41,6 +45,10 @@ #define __LL_SC_PREFIX(x) x #endif +#ifndef __LL_SC_EXPORT +#define __LL_SC_EXPORT(x) +#endif + #define ATOMIC_OP(op, asm_op) \ __LL_SC_INLINE void \ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ @@ -56,6 +64,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i)); \ } \ +__LL_SC_EXPORT(atomic_##op); #define ATOMIC_OP_RETURN(op, asm_op) \ __LL_SC_INLINE int \ @@ -75,7 +84,8 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ \ smp_mb(); \ return result; \ -} +} \ +__LL_SC_EXPORT(atomic_##op##_return); #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP(op, asm_op) \ @@ -115,6 +125,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) smp_mb(); return oldval; } +__LL_SC_EXPORT(atomic_cmpxchg); #define ATOMIC64_OP(op, asm_op) \ __LL_SC_INLINE void \ @@ -131,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i)); \ } \ +__LL_SC_EXPORT(atomic64_##op); #define ATOMIC64_OP_RETURN(op, asm_op) \ __LL_SC_INLINE long \ @@ -150,7 +162,8 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ \ smp_mb(); \ return result; \ -} +} \ +__LL_SC_EXPORT(atomic64_##op##_return); #define ATOMIC64_OPS(op, asm_op) \ ATOMIC64_OP(op, asm_op) \ @@ -190,6 +203,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) smp_mb(); return oldval; } +__LL_SC_EXPORT(atomic64_cmpxchg); __LL_SC_INLINE long __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) @@ -211,5 +225,6 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) return result; } +__LL_SC_EXPORT(atomic64_dec_if_positive); #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h new file mode 100644 index 000000000000..dce6ede740e8 --- /dev/null +++ b/arch/arm64/include/asm/atomic_lse.h @@ -0,0 +1,170 @@ +/* + * Based on arch/arm/include/asm/atomic.h + * + * Copyright (C) 1996 Russell King. + * Copyright (C) 2002 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ASM_ATOMIC_LSE_H +#define __ASM_ATOMIC_LSE_H + +#ifndef __ARM64_IN_ATOMIC_IMPL +#error "please don't include this file directly" +#endif + +/* Move the ll/sc atomics out-of-line */ +#define __LL_SC_INLINE +#define __LL_SC_PREFIX(x) __ll_sc_##x +#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x)) + +/* Macros for constructing calls to out-of-line ll/sc atomics */ +#define __LL_SC_CALL(op) \ + "bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n" +#define __LL_SC_CALL64(op) \ + "bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n" + +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL(op) \ + : "+r" (w0), "+Q" (v->counter) \ + : "r" (x1) \ + : "x30"); \ +} \ + +#define ATOMIC_OP_RETURN(op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL(op##_return) \ + : "+r" (w0) \ + : "r" (x1) \ + : "x30", "memory"); \ + \ + return w0; \ +} + +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_OP_RETURN(op, asm_op) + +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) + +ATOMIC_OP(and, and) +ATOMIC_OP(andnot, bic) +ATOMIC_OP(or, orr) +ATOMIC_OP(xor, eor) + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) +{ + register unsigned long x0 asm ("x0") = (unsigned long)ptr; + register int w1 asm ("w1") = old; + register int w2 asm ("w2") = new; + + asm volatile( + __LL_SC_CALL(cmpxchg) + : "+r" (x0) + : "r" (w1), "r" (w2) + : "x30", "cc", "memory"); + + return x0; +} + +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL64(op) \ + : "+r" (x0), "+Q" (v->counter) \ + : "r" (x1) \ + : "x30"); \ +} \ + +#define ATOMIC64_OP_RETURN(op, asm_op) \ +static inline long atomic64_##op##_return(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL64(op##_return) \ + : "+r" (x0) \ + : "r" (x1) \ + : "x30", "memory"); \ + \ + return x0; \ +} + +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_OP_RETURN(op, asm_op) + +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) + +ATOMIC64_OP(and, and) +ATOMIC64_OP(andnot, bic) +ATOMIC64_OP(or, orr) +ATOMIC64_OP(xor, eor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) +{ + register unsigned long x0 asm ("x0") = (unsigned long)ptr; + register long x1 asm ("x1") = old; + register long x2 asm ("x2") = new; + + asm volatile( + __LL_SC_CALL64(cmpxchg) + : "+r" (x0) + : "r" (x1), "r" (x2) + : "x30", "cc", "memory"); + + return x0; +} + +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + register unsigned long x0 asm ("x0") = (unsigned long)v; + + asm volatile( + __LL_SC_CALL64(dec_if_positive) + : "+r" (x0) + : + : "x30", "cc", "memory"); + + return x0; +} + +#endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index d98d3e39879e..1a811ecf71da 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -3,3 +3,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ clear_page.o memchr.o memcpy.o memmove.o memset.o \ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ strchr.o strrchr.o + +# Tell the compiler to treat all general purpose registers as +# callee-saved, which allows for efficient runtime patching of the bl +# instruction in the caller with an atomic instruction when supported by +# the CPU. Result and argument registers are handled correctly, based on +# the function prototype. +lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ + -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ + -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ + -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ + -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ + -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18 diff --git a/arch/arm64/lib/atomic_ll_sc.c b/arch/arm64/lib/atomic_ll_sc.c new file mode 100644 index 000000000000..b0c538b0da28 --- /dev/null +++ b/arch/arm64/lib/atomic_ll_sc.c @@ -0,0 +1,3 @@ +#include +#define __ARM64_IN_ATOMIC_IMPL +#include