mirror of https://gitee.com/openkylin/linux.git
A set of PI futex fixes:
- Address a longstanding issue where the user space part of the PI futex is not writeable. The kernel returns with inconsistent state which can in the worst case result in a UAF of a tasks kernel stack. The solution is to establish consistent kernel state which makes future operations on the futex fail because user space and kernel space state are inconsistent. Not a problem as PI futexes fundamentaly require a functional RW mapping and if user space pulls the rug under it, then it can keep the pieces it asked for. - Address an issue where the return value is incorrect in case that the futex was acquired after a timeout/signal made the waiter drop out of the rtmutex wait. In one of the corner cases the kernel returned an error code despite having successfully acquired the futex. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmATCSYTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoT8zEADDFWxDp9SwgMKBf98ZJqfPvPbIbmqW xhJr1R+RLBNBmYEahQKFPWqlPf0JMDzIjYeu8fxkwPVwsTBzSgJ7Lka1acDI/TQG X6kXTIgX/SNdkYZ534b+FcPutYW+KFrpprebIF5U8NLII2xnlNrcVaVzeUvs5qgx ssjP79eJAWFSCXl2CJFVneMIcbrU+7czgjW0Cr84Z6O0GanJqPWVzngzgts2bvZN K29BGQrWcQhq0W641f5toLYoF+kq98cgtMlwY49quZW9pbNToKeUXDrOro6y8fkg J8dYdzFH5KS9SHxACP19y4ILXqCi3GIkDs+RsUtcyY1ZoY20DLV4rEvG26pX9azO 5b6MeWsWHwHHxnp1apYenq0QjvfKKuJZdet1xctC+vivl+4RX4m2IvFONJLDQ++o fJ2j1m/39szg8gw3oObtL0zIPMczFMKgh7rCNkkTedghXevynMoi88RE643ELq8d MFr0C0n2ThHFbewtdyOZtZ6zERAKmPynx57iIETC63sChxiUfwmhFkxIuFUQkkwg G239O4dIVLGT4JKkOgyftRyog8JOzvdQb7qh1aYoj6tJNevVXIfuFOo6gRFp2SXp T2Rot+3TJaUS+f/GqsyHreJYhpLK7KSesjdYS1xYA9oWVtgt0AKVcY1AbSpyBifk ACXMtP8cjU0KgA== =wzqT -----END PGP SIGNATURE----- Merge tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull locking fixes from Thomas Gleixner: "A set of PI futex fixes: - Address a longstanding issue where the user space part of the PI futex is not writeable. The kernel returns with inconsistent state which can in the worst case result in a UAF of a tasks kernel stack. The solution is to establish consistent kernel state which makes future operations on the futex fail because user space and kernel space state are inconsistent. Not a problem as PI futexes fundamentaly require a functional RW mapping and if user space pulls the rug under it, then it can keep the pieces it asked for. - Address an issue where the return value is incorrect in case that the futex was acquired after a timeout/signal made the waiter drop out of the rtmutex wait. In one of the corner cases the kernel returned an error code despite having successfully acquired the futex" * tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: futex: Handle faults correctly for PI futexes futex: Simplify fixup_pi_state_owner() futex: Use pi_state_update_owner() in put_pi_state() rtmutex: Remove unused argument from rt_mutex_proxy_unlock() futex: Provide and use pi_state_update_owner() futex: Replace pointless printk in fixup_owner() futex: Ensure the correct return value from futex_lock_pi()
This commit is contained in:
commit
c64396cc36
211
kernel/futex.c
211
kernel/futex.c
|
@ -763,6 +763,29 @@ static struct futex_pi_state *alloc_pi_state(void)
|
|||
return pi_state;
|
||||
}
|
||||
|
||||
static void pi_state_update_owner(struct futex_pi_state *pi_state,
|
||||
struct task_struct *new_owner)
|
||||
{
|
||||
struct task_struct *old_owner = pi_state->owner;
|
||||
|
||||
lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
|
||||
|
||||
if (old_owner) {
|
||||
raw_spin_lock(&old_owner->pi_lock);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&old_owner->pi_lock);
|
||||
}
|
||||
|
||||
if (new_owner) {
|
||||
raw_spin_lock(&new_owner->pi_lock);
|
||||
WARN_ON(!list_empty(&pi_state->list));
|
||||
list_add(&pi_state->list, &new_owner->pi_state_list);
|
||||
pi_state->owner = new_owner;
|
||||
raw_spin_unlock(&new_owner->pi_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void get_pi_state(struct futex_pi_state *pi_state)
|
||||
{
|
||||
WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
|
||||
|
@ -785,17 +808,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
|
|||
* and has cleaned up the pi_state already
|
||||
*/
|
||||
if (pi_state->owner) {
|
||||
struct task_struct *owner;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
|
||||
owner = pi_state->owner;
|
||||
if (owner) {
|
||||
raw_spin_lock(&owner->pi_lock);
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&owner->pi_lock);
|
||||
}
|
||||
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
|
||||
pi_state_update_owner(pi_state, NULL);
|
||||
rt_mutex_proxy_unlock(&pi_state->pi_mutex);
|
||||
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
|
||||
}
|
||||
|
||||
|
@ -941,7 +958,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
|
|||
* FUTEX_OWNER_DIED bit. See [4]
|
||||
*
|
||||
* [10] There is no transient state which leaves owner and user space
|
||||
* TID out of sync.
|
||||
* TID out of sync. Except one error case where the kernel is denied
|
||||
* write access to the user address, see fixup_pi_state_owner().
|
||||
*
|
||||
*
|
||||
* Serialization and lifetime rules:
|
||||
|
@ -1521,26 +1539,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
|
|||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
if (!ret) {
|
||||
/*
|
||||
* This is a point of no return; once we modify the uval there is no
|
||||
* going back and subsequent operations must not fail.
|
||||
* This is a point of no return; once we modified the uval
|
||||
* there is no going back and subsequent operations must
|
||||
* not fail.
|
||||
*/
|
||||
|
||||
raw_spin_lock(&pi_state->owner->pi_lock);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&pi_state->owner->pi_lock);
|
||||
|
||||
raw_spin_lock(&new_owner->pi_lock);
|
||||
WARN_ON(!list_empty(&pi_state->list));
|
||||
list_add(&pi_state->list, &new_owner->pi_state_list);
|
||||
pi_state->owner = new_owner;
|
||||
raw_spin_unlock(&new_owner->pi_lock);
|
||||
|
||||
pi_state_update_owner(pi_state, new_owner);
|
||||
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
|
@ -2323,18 +2330,13 @@ static void unqueue_me_pi(struct futex_q *q)
|
|||
spin_unlock(q->lock_ptr);
|
||||
}
|
||||
|
||||
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
struct task_struct *argowner)
|
||||
{
|
||||
struct futex_pi_state *pi_state = q->pi_state;
|
||||
u32 uval, curval, newval;
|
||||
struct task_struct *oldowner, *newowner;
|
||||
u32 newtid;
|
||||
int ret, err = 0;
|
||||
|
||||
lockdep_assert_held(q->lock_ptr);
|
||||
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
u32 uval, curval, newval, newtid;
|
||||
int err = 0;
|
||||
|
||||
oldowner = pi_state->owner;
|
||||
|
||||
|
@ -2368,14 +2370,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|||
* We raced against a concurrent self; things are
|
||||
* already fixed up. Nothing to do.
|
||||
*/
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
|
||||
/* We got the lock after all, nothing to fix. */
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
/* We got the lock. pi_state is correct. Tell caller. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2402,8 +2402,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|||
* We raced against a concurrent self; things are
|
||||
* already fixed up. Nothing to do.
|
||||
*/
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
return 1;
|
||||
}
|
||||
newowner = argowner;
|
||||
}
|
||||
|
@ -2433,22 +2432,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|||
* We fixed up user space. Now we need to fix the pi_state
|
||||
* itself.
|
||||
*/
|
||||
if (pi_state->owner != NULL) {
|
||||
raw_spin_lock(&pi_state->owner->pi_lock);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
raw_spin_unlock(&pi_state->owner->pi_lock);
|
||||
}
|
||||
pi_state_update_owner(pi_state, newowner);
|
||||
|
||||
pi_state->owner = newowner;
|
||||
|
||||
raw_spin_lock(&newowner->pi_lock);
|
||||
WARN_ON(!list_empty(&pi_state->list));
|
||||
list_add(&pi_state->list, &newowner->pi_state_list);
|
||||
raw_spin_unlock(&newowner->pi_lock);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
|
||||
return 0;
|
||||
return argowner == current;
|
||||
|
||||
/*
|
||||
* In order to reschedule or handle a page fault, we need to drop the
|
||||
|
@ -2469,17 +2455,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|||
|
||||
switch (err) {
|
||||
case -EFAULT:
|
||||
ret = fault_in_user_writeable(uaddr);
|
||||
err = fault_in_user_writeable(uaddr);
|
||||
break;
|
||||
|
||||
case -EAGAIN:
|
||||
cond_resched();
|
||||
ret = 0;
|
||||
err = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
ret = err;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2489,17 +2474,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
|||
/*
|
||||
* Check if someone else fixed it for us:
|
||||
*/
|
||||
if (pi_state->owner != oldowner) {
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
if (pi_state->owner != oldowner)
|
||||
return argowner == current;
|
||||
|
||||
/* Retry if err was -EAGAIN or the fault in succeeded */
|
||||
if (!err)
|
||||
goto retry;
|
||||
|
||||
out_unlock:
|
||||
/*
|
||||
* fault_in_user_writeable() failed so user state is immutable. At
|
||||
* best we can make the kernel state consistent but user state will
|
||||
* be most likely hosed and any subsequent unlock operation will be
|
||||
* rejected due to PI futex rule [10].
|
||||
*
|
||||
* Ensure that the rtmutex owner is also the pi_state owner despite
|
||||
* the user space value claiming something different. There is no
|
||||
* point in unlocking the rtmutex if current is the owner as it
|
||||
* would need to wait until the next waiter has taken the rtmutex
|
||||
* to guarantee consistent state. Keep it simple. Userspace asked
|
||||
* for this wreckaged state.
|
||||
*
|
||||
* The rtmutex has an owner - either current or some other
|
||||
* task. See the EAGAIN loop above.
|
||||
*/
|
||||
pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
struct task_struct *argowner)
|
||||
{
|
||||
struct futex_pi_state *pi_state = q->pi_state;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(q->lock_ptr);
|
||||
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
ret = __fixup_pi_state_owner(uaddr, q, argowner);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
return ret;
|
||||
}
|
||||
|
@ -2523,8 +2535,6 @@ static long futex_wait_restart(struct restart_block *restart);
|
|||
*/
|
||||
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (locked) {
|
||||
/*
|
||||
* Got the lock. We might not be the anticipated owner if we
|
||||
|
@ -2535,8 +2545,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
|||
* stable state, anything else needs more attention.
|
||||
*/
|
||||
if (q->pi_state->owner != current)
|
||||
ret = fixup_pi_state_owner(uaddr, q, current);
|
||||
return ret ? ret : locked;
|
||||
return fixup_pi_state_owner(uaddr, q, current);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2547,23 +2557,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
|||
* Another speculative read; pi_state->owner == current is unstable
|
||||
* but needs our attention.
|
||||
*/
|
||||
if (q->pi_state->owner == current) {
|
||||
ret = fixup_pi_state_owner(uaddr, q, NULL);
|
||||
return ret;
|
||||
}
|
||||
if (q->pi_state->owner == current)
|
||||
return fixup_pi_state_owner(uaddr, q, NULL);
|
||||
|
||||
/*
|
||||
* Paranoia check. If we did not take the lock, then we should not be
|
||||
* the owner of the rt_mutex.
|
||||
* the owner of the rt_mutex. Warn and establish consistent state.
|
||||
*/
|
||||
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
|
||||
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
|
||||
"pi-state %p\n", ret,
|
||||
q->pi_state->pi_mutex.owner,
|
||||
q->pi_state->owner);
|
||||
}
|
||||
if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
|
||||
return fixup_pi_state_owner(uaddr, q, current);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2771,7 +2775,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
|||
ktime_t *time, int trylock)
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct task_struct *exiting = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
|
@ -2907,23 +2910,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
|||
if (res)
|
||||
ret = (res < 0) ? res : 0;
|
||||
|
||||
/*
|
||||
* If fixup_owner() faulted and was unable to handle the fault, unlock
|
||||
* it and return the fault to userspace.
|
||||
*/
|
||||
if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
|
||||
pi_state = q.pi_state;
|
||||
get_pi_state(pi_state);
|
||||
}
|
||||
|
||||
/* Unqueue and drop the lock */
|
||||
unqueue_me_pi(&q);
|
||||
|
||||
if (pi_state) {
|
||||
rt_mutex_futex_unlock(&pi_state->pi_mutex);
|
||||
put_pi_state(pi_state);
|
||||
}
|
||||
|
||||
goto out;
|
||||
|
||||
out_unlock_put_key:
|
||||
|
@ -3183,7 +3171,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
|||
u32 __user *uaddr2)
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
union futex_key key2 = FUTEX_KEY_INIT;
|
||||
|
@ -3261,16 +3248,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
|||
if (q.pi_state && (q.pi_state->owner != current)) {
|
||||
spin_lock(q.lock_ptr);
|
||||
ret = fixup_pi_state_owner(uaddr2, &q, current);
|
||||
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
|
||||
pi_state = q.pi_state;
|
||||
get_pi_state(pi_state);
|
||||
}
|
||||
/*
|
||||
* Drop the reference to the pi state which
|
||||
* the requeue_pi() code acquired for us.
|
||||
*/
|
||||
put_pi_state(q.pi_state);
|
||||
spin_unlock(q.lock_ptr);
|
||||
/*
|
||||
* Adjust the return value. It's either -EFAULT or
|
||||
* success (1) but the caller expects 0 for success.
|
||||
*/
|
||||
ret = ret < 0 ? ret : 0;
|
||||
}
|
||||
} else {
|
||||
struct rt_mutex *pi_mutex;
|
||||
|
@ -3301,25 +3289,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
|||
if (res)
|
||||
ret = (res < 0) ? res : 0;
|
||||
|
||||
/*
|
||||
* If fixup_pi_state_owner() faulted and was unable to handle
|
||||
* the fault, unlock the rt_mutex and return the fault to
|
||||
* userspace.
|
||||
*/
|
||||
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
|
||||
pi_state = q.pi_state;
|
||||
get_pi_state(pi_state);
|
||||
}
|
||||
|
||||
/* Unqueue and drop the lock. */
|
||||
unqueue_me_pi(&q);
|
||||
}
|
||||
|
||||
if (pi_state) {
|
||||
rt_mutex_futex_unlock(&pi_state->pi_mutex);
|
||||
put_pi_state(pi_state);
|
||||
}
|
||||
|
||||
if (ret == -EINTR) {
|
||||
/*
|
||||
* We've already been requeued, but cannot restart by calling
|
||||
|
|
|
@ -1716,8 +1716,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
|||
* possible because it belongs to the pi_state which is about to be freed
|
||||
* and it is not longer visible to other tasks.
|
||||
*/
|
||||
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner)
|
||||
void rt_mutex_proxy_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
debug_rt_mutex_proxy_unlock(lock);
|
||||
rt_mutex_set_owner(lock, NULL);
|
||||
|
|
|
@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
|
|||
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
|
||||
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
|
||||
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
|
||||
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
|
||||
struct rt_mutex_waiter *waiter,
|
||||
|
|
Loading…
Reference in New Issue