locking/mutex: Fix mutex handoff

While reviewing the ww_mutex patches, I noticed that it was still
possible to (incorrectly) succeed for (incorrect) code like:

	mutex_lock(&a);
	mutex_lock(&a);

This was possible if the second mutex_lock() would block (as expected)
but then receive a spurious wakeup. At that point it would find itself
at the front of the queue, request a handoff and instantly claim
ownership and continue, since owner would point to itself.

Avoid this scenario and simplify the code by introducing a third low
bit to signal handoff pickup. So once we request handoff, unlock
clears the handoff bit and sets the pickup bit along with the new
owner.

This also removes the need for the .handoff argument to
__mutex_trylock(), since that becomes superfluous with PICKUP.

In order to guarantee enough low bits, ensure task_struct alignment is
at least L1_CACHE_BYTES (which seems a good ideal regardless).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9d659ae14b ("locking/mutex: Add lock handoff to avoid starvation")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2017-01-11 14:17:48 +01:00 committed by Ingo Molnar
parent 52b94129f2
commit e274795ea7
3 changed files with 56 additions and 58 deletions

View File

@ -65,7 +65,7 @@ struct mutex {
static inline struct task_struct *__mutex_owner(struct mutex *lock)
{
return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
}
/*

View File

@ -432,11 +432,13 @@ void __init fork_init(void)
int i;
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
#define ARCH_MIN_TASKALIGN 0
#endif
int align = min_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
/* create a slab on which task_structs can be allocated */
task_struct_cachep = kmem_cache_create("task_struct",
arch_task_struct_size, ARCH_MIN_TASKALIGN,
arch_task_struct_size, align,
SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
#endif

View File

@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init);
/*
* @owner: contains: 'struct task_struct *' to the current lock owner,
* NULL means not owned. Since task_struct pointers are aligned at
* ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low
* bits to store extra state.
* at least L1_CACHE_BYTES, we have low bits to store extra state.
*
* Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
* Bit1 indicates unlock needs to hand the lock to the top-waiter
* Bit2 indicates handoff has been done and we're waiting for pickup.
*/
#define MUTEX_FLAG_WAITERS 0x01
#define MUTEX_FLAG_HANDOFF 0x02
#define MUTEX_FLAG_PICKUP 0x04
#define MUTEX_FLAGS 0x03
#define MUTEX_FLAGS 0x07
static inline struct task_struct *__owner_task(unsigned long owner)
{
@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner)
}
/*
* Actual trylock that will work on any unlocked state.
*
* When setting the owner field, we must preserve the low flag bits.
*
* Be careful with @handoff, only set that in a wait-loop (where you set
* HANDOFF) to avoid recursive lock attempts.
* Trylock variant that retuns the owning task on failure.
*/
static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
{
unsigned long owner, curr = (unsigned long)current;
owner = atomic_long_read(&lock->owner);
for (;;) { /* must loop, can race against a flag */
unsigned long old, flags = __owner_flags(owner);
unsigned long task = owner & ~MUTEX_FLAGS;
if (__owner_task(owner)) {
if (handoff && unlikely(__owner_task(owner) == current)) {
/*
* Provide ACQUIRE semantics for the lock-handoff.
*
* We cannot easily use load-acquire here, since
* the actual load is a failed cmpxchg, which
* doesn't imply any barriers.
*
* Also, this is a fairly unlikely scenario, and
* this contains the cost.
*/
smp_mb(); /* ACQUIRE */
return true;
}
if (task) {
if (likely(task != curr))
break;
return false;
if (likely(!(flags & MUTEX_FLAG_PICKUP)))
break;
flags &= ~MUTEX_FLAG_PICKUP;
} else {
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
#endif
}
/*
@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
* past the point where we acquire it. This would be possible
* if we (accidentally) set the bit on an unlocked mutex.
*/
if (handoff)
flags &= ~MUTEX_FLAG_HANDOFF;
old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
if (old == owner)
return true;
return NULL;
owner = old;
}
return __owner_task(owner);
}
/*
* Actual trylock that will work on any unlocked state.
*/
static inline bool __mutex_trylock(struct mutex *lock)
{
return !__mutex_trylock_or_owner(lock);
}
#ifndef CONFIG_DEBUG_LOCK_ALLOC
@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
/*
* Give up ownership to a specific task, when @task = NULL, this is equivalent
* to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE
* semantics like a regular unlock, the __mutex_trylock() provides matching
* ACQUIRE semantics for the handoff.
* to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
* WAITERS. Provides RELEASE semantics like a regular unlock, the
* __mutex_trylock() provides a matching ACQUIRE semantics for the handoff.
*/
static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
{
@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif
new = (owner & MUTEX_FLAG_WAITERS);
new |= (unsigned long)task;
if (task)
new |= MUTEX_FLAG_PICKUP;
old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
if (old == owner)
@ -435,8 +439,6 @@ static bool mutex_optimistic_spin(struct mutex *lock,
struct ww_acquire_ctx *ww_ctx,
const bool use_ww_ctx, const bool waiter)
{
struct task_struct *task = current;
if (!waiter) {
/*
* The purpose of the mutex_can_spin_on_owner() function is
@ -476,24 +478,17 @@ static bool mutex_optimistic_spin(struct mutex *lock,
goto fail_unlock;
}
/* Try to acquire the mutex... */
owner = __mutex_trylock_or_owner(lock);
if (!owner)
break;
/*
* If there's an owner, wait for it to either
* There's an owner, wait for it to either
* release the lock or go to sleep.
*/
owner = __mutex_owner(lock);
if (owner) {
if (waiter && owner == task) {
smp_mb(); /* ACQUIRE */
break;
}
if (!mutex_spin_on_owner(lock, owner))
goto fail_unlock;
}
/* Try to acquire the mutex if it is unlocked. */
if (__mutex_trylock(lock, waiter))
break;
/*
* The cpu_relax() call is a compiler barrier which forces
@ -637,7 +632,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
if (__mutex_trylock(lock, false) ||
if (__mutex_trylock(lock) ||
mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) {
/* got the lock, yay! */
lock_acquired(&lock->dep_map, ip);
@ -651,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
/*
* After waiting to acquire the wait_lock, try again.
*/
if (__mutex_trylock(lock, false))
if (__mutex_trylock(lock))
goto skip_wait;
debug_mutex_lock_common(lock, &waiter);
@ -674,7 +669,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* before testing the error conditions to make sure we pick up
* the handoff.
*/
if (__mutex_trylock(lock, first))
if (__mutex_trylock(lock))
goto acquired;
/*
@ -707,8 +702,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* state back to RUNNING and fall through the next schedule(),
* or we must see its unlock and acquire.
*/
if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
__mutex_trylock(lock, first))
if (__mutex_trylock(lock) ||
(first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)))
break;
spin_lock_mutex(&lock->wait_lock, flags);
@ -865,6 +860,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif
if (owner & MUTEX_FLAG_HANDOFF)
@ -1003,7 +999,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
*/
int __sched mutex_trylock(struct mutex *lock)
{
bool locked = __mutex_trylock(lock, false);
bool locked = __mutex_trylock(lock);
if (locked)
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);