sched/wait, RCU: Introduce rcuwait machinery
rcuwait provides support for (single) RCU-safe task wait/wake functionality, with the caveat that it must not be called after exit_notify(), such that we avoid racing with rcu delayed_put_task_struct callbacks, task_struct being rcu unaware in this context -- for which we similarly have task_rcu_dereference() magic, but with different return semantics, which can conflict with the wakeup side. The interfaces are quite straightforward: rcuwait_wait_event() rcuwait_wake_up() More details are in the comments, but it's perhaps worth mentioning at least, that users must provide proper serialization when waiting on a condition, and avoid corrupting a concurrent waiter. Also care must be taken between the task and the condition for when calling the wakeup -- we cannot miss wakeups. When porting users, this is for example, a given when using waitqueues in that everything is done under the q->lock. As such, it can remove sources of non preemptable unbounded work for realtime. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1484148146-14210-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
642fa448ae
commit
8f95c90ceb
|
@ -0,0 +1,63 @@
|
|||
#ifndef _LINUX_RCUWAIT_H_
|
||||
#define _LINUX_RCUWAIT_H_
|
||||
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
/*
|
||||
* rcuwait provides a way of blocking and waking up a single
|
||||
* task in an rcu-safe manner; where it is forbidden to use
|
||||
* after exit_notify(). task_struct is not properly rcu protected,
|
||||
* unless dealing with rcu-aware lists, ie: find_task_by_*().
|
||||
*
|
||||
* Alternatively we have task_rcu_dereference(), but the return
|
||||
* semantics have different implications which would break the
|
||||
* wakeup side. The only time @task is non-nil is when a user is
|
||||
* blocked (or checking if it needs to) on a condition, and reset
|
||||
* as soon as we know that the condition has succeeded and are
|
||||
* awoken.
|
||||
*/
|
||||
struct rcuwait {
|
||||
struct task_struct *task;
|
||||
};
|
||||
|
||||
#define __RCUWAIT_INITIALIZER(name) \
|
||||
{ .task = NULL, }
|
||||
|
||||
static inline void rcuwait_init(struct rcuwait *w)
|
||||
{
|
||||
w->task = NULL;
|
||||
}
|
||||
|
||||
extern void rcuwait_wake_up(struct rcuwait *w);
|
||||
|
||||
/*
|
||||
* The caller is responsible for locking around rcuwait_wait_event(),
|
||||
* such that writes to @task are properly serialized.
|
||||
*/
|
||||
#define rcuwait_wait_event(w, condition) \
|
||||
({ \
|
||||
/* \
|
||||
* Complain if we are called after do_exit()/exit_notify(), \
|
||||
* as we cannot rely on the rcu critical region for the \
|
||||
* wakeup side. \
|
||||
*/ \
|
||||
WARN_ON(current->exit_state); \
|
||||
\
|
||||
rcu_assign_pointer((w)->task, current); \
|
||||
for (;;) { \
|
||||
/* \
|
||||
* Implicit barrier (A) pairs with (B) in \
|
||||
* rcuwait_trywake(). \
|
||||
*/ \
|
||||
set_current_state(TASK_UNINTERRUPTIBLE); \
|
||||
if (condition) \
|
||||
break; \
|
||||
\
|
||||
schedule(); \
|
||||
} \
|
||||
\
|
||||
WRITE_ONCE((w)->task, NULL); \
|
||||
__set_current_state(TASK_RUNNING); \
|
||||
})
|
||||
|
||||
#endif /* _LINUX_RCUWAIT_H_ */
|
|
@ -55,6 +55,7 @@
|
|||
#include <linux/shm.h>
|
||||
#include <linux/kcov.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/rcuwait.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
|
@ -282,6 +283,35 @@ struct task_struct *task_rcu_dereference(struct task_struct **ptask)
|
|||
return task;
|
||||
}
|
||||
|
||||
void rcuwait_wake_up(struct rcuwait *w)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/*
|
||||
* Order condition vs @task, such that everything prior to the load
|
||||
* of @task is visible. This is the condition as to why the user called
|
||||
* rcuwait_trywake() in the first place. Pairs with set_current_state()
|
||||
* barrier (A) in rcuwait_wait_event().
|
||||
*
|
||||
* WAIT WAKE
|
||||
* [S] tsk = current [S] cond = true
|
||||
* MB (A) MB (B)
|
||||
* [L] cond [L] tsk
|
||||
*/
|
||||
smp_rmb(); /* (B) */
|
||||
|
||||
/*
|
||||
* Avoid using task_rcu_dereference() magic as long as we are careful,
|
||||
* see comment in rcuwait_wait_event() regarding ->exit_state.
|
||||
*/
|
||||
task = rcu_dereference(w->task);
|
||||
if (task)
|
||||
wake_up_process(task);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct task_struct *try_get_task_struct(struct task_struct **ptask)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
|
Loading…
Reference in New Issue