RISC-V: Assorted memory model fixes

These fixes fall into three categories

* The definiton of __smp_{store_release,load_acquire}, which allow us to
  emit a full fence when unnecessary.
* Fixes to avoid relying on the behavior of "*.aqrl" atomics, as those
  are specified in the currently released RISC-V memory model draft in
  a way that makes them useless for Linux.  This might change in the
  future, but now the code matches the memory model spec as it's written
  so at least we're getting closer to something sane.  The actual fix is
  to delete the RISC-V specific atomics and drop back to generic
  versions that use the new fences from above.
* Cleanups to our atomic macros, which are mostly non-functional
  changes.

Unfortunately I haven't given these as thorough of a testing as I
probably should have, but I've poked through the code and they seem
generally OK.
This commit is contained in:
Palmer Dabbelt 2018-04-02 20:36:33 -07:00
commit 2c9046b71b
No known key found for this signature in database
GPG Key ID: EF4CA1502CCBAB41
5 changed files with 628 additions and 232 deletions

View File

@ -24,6 +24,20 @@
#include <asm/barrier.h>
#define ATOMIC_INIT(i) { (i) }
#define __atomic_op_acquire(op, args...) \
({ \
typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \
__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory"); \
__ret; \
})
#define __atomic_op_release(op, args...) \
({ \
__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory"); \
op##_relaxed(args); \
})
static __always_inline int atomic_read(const atomic_t *v)
{
return READ_ONCE(v->counter);
@ -51,14 +65,15 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
* one version to worry about.
*/
#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
static __always_inline \
void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
{ \
__asm__ __volatile__ ( \
"amo" #asm_op "." #asm_type " zero, %1, %0" \
" amo" #asm_op "." #asm_type " zero, %1, %0" \
: "+A" (v->counter) \
: "r" (I) \
: "memory"); \
}
} \
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I) \
@ -79,75 +94,115 @@ ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_OPS
/*
* Atomic ops that have ordered, relaxed, acquire, and relese variants.
* Atomic ops that have ordered, relaxed, acquire, and release variants.
* There's two flavors of these: the arithmatic ops have both fetch and return
* versions, while the logical ops only have fetch versions.
*/
#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \
#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
c_type atomic##prefix##_fetch_##op##_relaxed(c_type i, \
atomic##prefix##_t *v) \
{ \
register c_type ret; \
__asm__ __volatile__ ( \
"amo" #asm_op "." #asm_type #asm_or " %1, %2, %0" \
" amo" #asm_op "." #asm_type " %1, %2, %0" \
: "+A" (v->counter), "=r" (ret) \
: "r" (I) \
: "memory"); \
return ret; \
} \
static __always_inline \
c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
__asm__ __volatile__ ( \
" amo" #asm_op "." #asm_type ".aqrl %1, %2, %0" \
: "+A" (v->counter), "=r" (ret) \
: "r" (I) \
: "memory"); \
return ret; \
}
#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \
static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, atomic##prefix##_t *v) \
#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \
static __always_inline \
c_type atomic##prefix##_##op##_return_relaxed(c_type i, \
atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##op##c_or(i, v) c_op I; \
return atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
} \
static __always_inline \
c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##op(i, v) c_op I; \
}
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, )
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64)
#endif
ATOMIC_OPS(add, add, +, i, , _relaxed)
ATOMIC_OPS(add, add, +, i, .aq , _acquire)
ATOMIC_OPS(add, add, +, i, .rl , _release)
ATOMIC_OPS(add, add, +, i, .aqrl, )
ATOMIC_OPS(add, add, +, i)
ATOMIC_OPS(sub, add, +, -i)
ATOMIC_OPS(sub, add, +, -i, , _relaxed)
ATOMIC_OPS(sub, add, +, -i, .aq , _acquire)
ATOMIC_OPS(sub, add, +, -i, .rl , _release)
ATOMIC_OPS(sub, add, +, -i, .aqrl, )
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
#define atomic_add_return atomic_add_return
#define atomic_sub_return atomic_sub_return
#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
#define atomic_fetch_add atomic_fetch_add
#define atomic_fetch_sub atomic_fetch_sub
#ifndef CONFIG_GENERIC_ATOMIC64
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
#define atomic64_add_return atomic64_add_return
#define atomic64_sub_return atomic64_sub_return
#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
#define atomic64_fetch_add atomic64_fetch_add
#define atomic64_fetch_sub atomic64_fetch_sub
#endif
#undef ATOMIC_OPS
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, )
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64)
#endif
ATOMIC_OPS(and, and, i, , _relaxed)
ATOMIC_OPS(and, and, i, .aq , _acquire)
ATOMIC_OPS(and, and, i, .rl , _release)
ATOMIC_OPS(and, and, i, .aqrl, )
ATOMIC_OPS(and, and, i)
ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, i)
ATOMIC_OPS( or, or, i, , _relaxed)
ATOMIC_OPS( or, or, i, .aq , _acquire)
ATOMIC_OPS( or, or, i, .rl , _release)
ATOMIC_OPS( or, or, i, .aqrl, )
#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
#define atomic_fetch_and atomic_fetch_and
#define atomic_fetch_or atomic_fetch_or
#define atomic_fetch_xor atomic_fetch_xor
ATOMIC_OPS(xor, xor, i, , _relaxed)
ATOMIC_OPS(xor, xor, i, .aq , _acquire)
ATOMIC_OPS(xor, xor, i, .rl , _release)
ATOMIC_OPS(xor, xor, i, .aqrl, )
#ifndef CONFIG_GENERIC_ATOMIC64
#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#define atomic64_fetch_and atomic64_fetch_and
#define atomic64_fetch_or atomic64_fetch_or
#define atomic64_fetch_xor atomic64_fetch_xor
#endif
#undef ATOMIC_OPS
@ -157,22 +212,24 @@ ATOMIC_OPS(xor, xor, i, .aqrl, )
/*
* The extra atomic operations that are constructed from one of the core
* AMO-based operations above (aside from sub, which is easier to fit above).
* These are required to perform a barrier, but they're OK this way because
* atomic_*_return is also required to perform a barrier.
* These are required to perform a full barrier, but they're OK this way
* because atomic_*_return is also required to perform a full barrier.
*
*/
#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \
static __always_inline bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
static __always_inline \
bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
{ \
return atomic##prefix##_##func_op##_return(i, v) comp_op I; \
}
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, func_op, comp_op, I) \
ATOMIC_OP (op, func_op, comp_op, I, int, )
ATOMIC_OP(op, func_op, comp_op, I, int, )
#else
#define ATOMIC_OPS(op, func_op, comp_op, I) \
ATOMIC_OP (op, func_op, comp_op, I, int, ) \
ATOMIC_OP (op, func_op, comp_op, I, long, 64)
ATOMIC_OP(op, func_op, comp_op, I, int, ) \
ATOMIC_OP(op, func_op, comp_op, I, long, 64)
#endif
ATOMIC_OPS(add_and_test, add, ==, 0)
@ -183,48 +240,84 @@ ATOMIC_OPS(add_negative, add, <, 0)
#undef ATOMIC_OPS
#define ATOMIC_OP(op, func_op, I, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \
static __always_inline \
void atomic##prefix##_##op(atomic##prefix##_t *v) \
{ \
atomic##prefix##_##func_op(I, v); \
}
#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
static __always_inline \
c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##func_op##_relaxed(I, v); \
} \
static __always_inline \
c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##func_op(I, v); \
}
#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \
static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \
static __always_inline \
c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##op##_relaxed(v) c_op I; \
} \
static __always_inline \
c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##op(v) c_op I; \
}
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP( op, asm_op, I, int, ) \
ATOMIC_FETCH_OP( op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_OP (op, asm_op, I, int, ) \
ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP( op, asm_op, I, int, ) \
ATOMIC_FETCH_OP( op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \
ATOMIC_OP (op, asm_op, I, long, 64) \
ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \
ATOMIC_OP( op, asm_op, I, long, 64) \
ATOMIC_FETCH_OP( op, asm_op, I, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
#endif
ATOMIC_OPS(inc, add, +, 1)
ATOMIC_OPS(dec, add, +, -1)
#define atomic_inc_return_relaxed atomic_inc_return_relaxed
#define atomic_dec_return_relaxed atomic_dec_return_relaxed
#define atomic_inc_return atomic_inc_return
#define atomic_dec_return atomic_dec_return
#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
#define atomic_fetch_inc atomic_fetch_inc
#define atomic_fetch_dec atomic_fetch_dec
#ifndef CONFIG_GENERIC_ATOMIC64
#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
#define atomic64_inc_return atomic64_inc_return
#define atomic64_dec_return atomic64_dec_return
#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
#define atomic64_fetch_inc atomic64_fetch_inc
#define atomic64_fetch_dec atomic64_fetch_dec
#endif
#undef ATOMIC_OPS
#undef ATOMIC_OP
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \
static __always_inline bool atomic##prefix##_##op(atomic##prefix##_t *v) \
static __always_inline \
bool atomic##prefix##_##op(atomic##prefix##_t *v) \
{ \
return atomic##prefix##_##func_op##_return(v) comp_op I; \
}
@ -238,19 +331,19 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
#undef ATOMIC_OP
/* This is required to provide a barrier on success. */
/* This is required to provide a full barrier on success. */
static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int prev, rc;
__asm__ __volatile__ (
"0:\n\t"
"lr.w.aqrl %[p], %[c]\n\t"
"beq %[p], %[u], 1f\n\t"
"add %[rc], %[p], %[a]\n\t"
"sc.w.aqrl %[rc], %[rc], %[c]\n\t"
"bnez %[rc], 0b\n\t"
"1:"
"0: lr.w %[p], %[c]\n"
" beq %[p], %[u], 1f\n"
" add %[rc], %[p], %[a]\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
" fence rw, rw\n"
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [a]"r" (a), [u]"r" (u)
: "memory");
@ -263,13 +356,13 @@ static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u)
long prev, rc;
__asm__ __volatile__ (
"0:\n\t"
"lr.d.aqrl %[p], %[c]\n\t"
"beq %[p], %[u], 1f\n\t"
"add %[rc], %[p], %[a]\n\t"
"sc.d.aqrl %[rc], %[rc], %[c]\n\t"
"bnez %[rc], 0b\n\t"
"1:"
"0: lr.d %[p], %[c]\n"
" beq %[p], %[u], 1f\n"
" add %[rc], %[p], %[a]\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
" fence rw, rw\n"
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [a]"r" (a), [u]"r" (u)
: "memory");
@ -300,37 +393,63 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
/*
* atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
* {cmp,}xchg and the operations that return, so they need a barrier.
* {cmp,}xchg and the operations that return, so they need a full barrier.
*/
/*
* FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
* assigning the same barrier to both the LR and SC operations, but that might
* not make any sense. We're waiting on a memory model specification to
* determine exactly what the right thing to do is here.
*/
#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \
static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \
#define ATOMIC_OP(c_t, prefix, size) \
static __always_inline \
c_t atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \
{ \
return __cmpxchg(&(v->counter), o, n, size, asm_or, asm_or); \
return __xchg_relaxed(&(v->counter), n, size); \
} \
static __always_inline c_t atomic##prefix##_xchg##c_or(atomic##prefix##_t *v, c_t n) \
static __always_inline \
c_t atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \
{ \
return __xchg(n, &(v->counter), size, asm_or); \
return __xchg_acquire(&(v->counter), n, size); \
} \
static __always_inline \
c_t atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \
{ \
return __xchg_release(&(v->counter), n, size); \
} \
static __always_inline \
c_t atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \
{ \
return __xchg(&(v->counter), n, size); \
} \
static __always_inline \
c_t atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \
c_t o, c_t n) \
{ \
return __cmpxchg_relaxed(&(v->counter), o, n, size); \
} \
static __always_inline \
c_t atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \
c_t o, c_t n) \
{ \
return __cmpxchg_acquire(&(v->counter), o, n, size); \
} \
static __always_inline \
c_t atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \
c_t o, c_t n) \
{ \
return __cmpxchg_release(&(v->counter), o, n, size); \
} \
static __always_inline \
c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
{ \
return __cmpxchg(&(v->counter), o, n, size); \
}
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(c_or, asm_or) \
ATOMIC_OP( int, , c_or, 4, asm_or)
#define ATOMIC_OPS() \
ATOMIC_OP( int, , 4)
#else
#define ATOMIC_OPS(c_or, asm_or) \
ATOMIC_OP( int, , c_or, 4, asm_or) \
ATOMIC_OP(long, 64, c_or, 8, asm_or)
#define ATOMIC_OPS() \
ATOMIC_OP( int, , 4) \
ATOMIC_OP(long, 64, 8)
#endif
ATOMIC_OPS( , .aqrl)
ATOMIC_OPS(_acquire, .aq)
ATOMIC_OPS(_release, .rl)
ATOMIC_OPS(_relaxed, )
ATOMIC_OPS()
#undef ATOMIC_OPS
#undef ATOMIC_OP
@ -340,13 +459,13 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
int prev, rc;
__asm__ __volatile__ (
"0:\n\t"
"lr.w.aqrl %[p], %[c]\n\t"
"sub %[rc], %[p], %[o]\n\t"
"bltz %[rc], 1f\n\t"
"sc.w.aqrl %[rc], %[rc], %[c]\n\t"
"bnez %[rc], 0b\n\t"
"1:"
"0: lr.w %[p], %[c]\n"
" sub %[rc], %[p], %[o]\n"
" bltz %[rc], 1f\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
" fence rw, rw\n"
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [o]"r" (offset)
: "memory");
@ -361,13 +480,13 @@ static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset)
long prev, rc;
__asm__ __volatile__ (
"0:\n\t"
"lr.d.aqrl %[p], %[c]\n\t"
"sub %[rc], %[p], %[o]\n\t"
"bltz %[rc], 1f\n\t"
"sc.d.aqrl %[rc], %[rc], %[c]\n\t"
"bnez %[rc], 0b\n\t"
"1:"
"0: lr.d %[p], %[c]\n"
" sub %[rc], %[p], %[o]\n"
" bltz %[rc], 1f\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
" fence rw, rw\n"
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [o]"r" (offset)
: "memory");

View File

@ -38,6 +38,21 @@
#define __smp_rmb() RISCV_FENCE(r,r)
#define __smp_wmb() RISCV_FENCE(w,w)
#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
RISCV_FENCE(rw,w); \
WRITE_ONCE(*p, v); \
} while (0)
#define __smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
RISCV_FENCE(r,rw); \
___p1; \
})
/*
* This is a very specific barrier: it's currently only used in two places in
* the kernel, both in the scheduler. See include/linux/spinlock.h for the two

View File

@ -17,8 +17,9 @@
#include <linux/bug.h>
#include <asm/barrier.h>
#include <asm/fence.h>
#define __xchg(new, ptr, size, asm_or) \
#define __xchg_relaxed(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
@ -26,14 +27,14 @@
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"amoswap.w" #asm_or " %0, %2, %1" \
" amoswap.w %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"amoswap.d" #asm_or " %0, %2, %1" \
" amoswap.d %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
@ -44,7 +45,114 @@
__ret; \
})
#define xchg(ptr, x) (__xchg((x), (ptr), sizeof(*(ptr)), .aqrl))
#define xchg_relaxed(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg_acquire(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
" amoswap.w %0, %2, %1\n" \
RISCV_ACQUIRE_BARRIER \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
" amoswap.d %0, %2, %1\n" \
RISCV_ACQUIRE_BARRIER \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define xchg_acquire(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_acquire((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg_release(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
" amoswap.w %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
" amoswap.d %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define xchg_release(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_release((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
" amoswap.w.aqrl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
" amoswap.d.aqrl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define xchg(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr))); \
})
#define xchg32(ptr, x) \
({ \
@ -63,7 +171,7 @@
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __cmpxchg(ptr, old, new, size, lrb, scb) \
#define __cmpxchg_relaxed(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
@ -73,24 +181,160 @@
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"0:" \
"lr.w" #scb " %0, %2\n" \
"bne %0, %z3, 1f\n" \
"sc.w" #lrb " %1, %z4, %2\n" \
"bnez %1, 0b\n" \
"1:" \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"0:" \
"lr.d" #scb " %0, %2\n" \
"bne %0, %z3, 1f\n" \
"sc.d" #lrb " %1, %z4, %2\n" \
"bnez %1, 0b\n" \
"1:" \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define cmpxchg_relaxed(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg_acquire(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
RISCV_ACQUIRE_BARRIER \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
RISCV_ACQUIRE_BARRIER \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define cmpxchg_acquire(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg_release(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define cmpxchg_release(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_release((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w.rl %1, %z4, %2\n" \
" bnez %1, 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d.rl %1, %z4, %2\n" \
" bnez %1, 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
@ -102,10 +346,15 @@
})
#define cmpxchg(ptr, o, n) \
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), .aqrl, .aqrl))
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define cmpxchg_local(ptr, o, n) \
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), , ))
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
#define cmpxchg32(ptr, o, n) \
({ \
@ -116,7 +365,7 @@
#define cmpxchg32_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
cmpxchg_local((ptr), (o), (n)); \
cmpxchg_relaxed((ptr), (o), (n)) \
})
#define cmpxchg64(ptr, o, n) \
@ -128,7 +377,7 @@
#define cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
cmpxchg_local((ptr), (o), (n)); \
cmpxchg_relaxed((ptr), (o), (n)); \
})
#endif /* _ASM_RISCV_CMPXCHG_H */

View File

@ -0,0 +1,12 @@
#ifndef _ASM_RISCV_FENCE_H
#define _ASM_RISCV_FENCE_H
#ifdef CONFIG_SMP
#define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n"
#define RISCV_RELEASE_BARRIER "\tfence rw, w\n"
#else
#define RISCV_ACQUIRE_BARRIER
#define RISCV_RELEASE_BARRIER
#endif
#endif /* _ASM_RISCV_FENCE_H */

View File

@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <asm/current.h>
#include <asm/fence.h>
/*
* Simple spin lock operations. These provide no fairness guarantees.
@ -28,10 +29,7 @@
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
__asm__ __volatile__ (
"amoswap.w.rl x0, x0, %0"
: "=A" (lock->lock)
:: "memory");
smp_store_release(&lock->lock, 0);
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
@ -39,7 +37,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
int tmp = 1, busy;
__asm__ __volatile__ (
"amoswap.w.aq %0, %2, %1"
" amoswap.w %0, %2, %1\n"
RISCV_ACQUIRE_BARRIER
: "=r" (busy), "+A" (lock->lock)
: "r" (tmp)
: "memory");
@ -68,8 +67,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n"
" bltz %1, 1b\n"
" addi %1, %1, 1\n"
" sc.w.aq %1, %1, %0\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp)
:: "memory");
}
@ -82,8 +82,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n"
" bnez %1, 1b\n"
" li %1, -1\n"
" sc.w.aq %1, %1, %0\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
: "+A" (lock->lock), "=&r" (tmp)
:: "memory");
}
@ -96,8 +97,9 @@ static inline int arch_read_trylock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n"
" bltz %1, 1f\n"
" addi %1, %1, 1\n"
" sc.w.aq %1, %1, %0\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n"
: "+A" (lock->lock), "=&r" (busy)
:: "memory");
@ -113,8 +115,9 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
"1: lr.w %1, %0\n"
" bnez %1, 1f\n"
" li %1, -1\n"
" sc.w.aq %1, %1, %0\n"
" sc.w %1, %1, %0\n"
" bnez %1, 1b\n"
RISCV_ACQUIRE_BARRIER
"1:\n"
: "+A" (lock->lock), "=&r" (busy)
:: "memory");
@ -125,7 +128,8 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
static inline void arch_read_unlock(arch_rwlock_t *lock)
{
__asm__ __volatile__(
"amoadd.w.rl x0, %1, %0"
RISCV_RELEASE_BARRIER
" amoadd.w x0, %1, %0\n"
: "+A" (lock->lock)
: "r" (-1)
: "memory");
@ -133,10 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock)
static inline void arch_write_unlock(arch_rwlock_t *lock)
{
__asm__ __volatile__ (
"amoswap.w.rl x0, x0, %0"
: "=A" (lock->lock)
:: "memory");
smp_store_release(&lock->lock, 0);
}
#endif /* _ASM_RISCV_SPINLOCK_H */