membarrier: Document scheduler barrier requirements
Document the membarrier requirement on having a full memory barrier in __schedule() after coming from user-space, before storing to rq->curr. It is provided by smp_mb__after_spinlock() in __schedule(). Document that membarrier requires a full barrier on transition from kernel thread to userspace thread. We currently have an implicit barrier from atomic_dec_and_test() in mmdrop() that ensures this. The x86 switch_mm_irqs_off() full barrier is currently provided by many cpumask update operations as well as write_cr3(). Document that write_cr3() provides this barrier. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Andrew Hunter <ahh@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Avi Kivity <avi@scylladb.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Dave Watson <davejwatson@fb.com> Cc: David Sehr <sehr@google.com> Cc: Greg Hackmann <ghackmann@google.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maged Michael <maged.michael@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-4-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
3ccfebedd8
commit
306e060435
|
@ -228,6 +228,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|||
#endif
|
||||
this_cpu_write(cpu_tlbstate.is_lazy, false);
|
||||
|
||||
/*
|
||||
* The membarrier system call requires a full memory barrier
|
||||
* before returning to user-space, after storing to rq->curr.
|
||||
* Writing to CR3 provides that full memory barrier.
|
||||
*/
|
||||
if (real_prev == next) {
|
||||
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||
next->context.ctx_id);
|
||||
|
|
|
@ -39,6 +39,11 @@ static inline void mmgrab(struct mm_struct *mm)
|
|||
extern void __mmdrop(struct mm_struct *);
|
||||
static inline void mmdrop(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* The implicit full barrier implied by atomic_dec_and_test() is
|
||||
* required by the membarrier system call before returning to
|
||||
* user-space, after storing to rq->curr.
|
||||
*/
|
||||
if (unlikely(atomic_dec_and_test(&mm->mm_count)))
|
||||
__mmdrop(mm);
|
||||
}
|
||||
|
|
|
@ -2703,6 +2703,12 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
|||
finish_arch_post_lock_switch();
|
||||
|
||||
fire_sched_in_preempt_notifiers(current);
|
||||
/*
|
||||
* When transitioning from a kernel thread to a userspace
|
||||
* thread, mmdrop()'s implicit full barrier is required by the
|
||||
* membarrier system call, because the current ->active_mm can
|
||||
* become the current mm without going through switch_mm().
|
||||
*/
|
||||
if (mm)
|
||||
mmdrop(mm);
|
||||
if (unlikely(prev_state == TASK_DEAD)) {
|
||||
|
@ -2808,6 +2814,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
|||
*/
|
||||
arch_start_context_switch(prev);
|
||||
|
||||
/*
|
||||
* If mm is non-NULL, we pass through switch_mm(). If mm is
|
||||
* NULL, we will pass through mmdrop() in finish_task_switch().
|
||||
* Both of these contain the full memory barrier required by
|
||||
* membarrier after storing to rq->curr, before returning to
|
||||
* user-space.
|
||||
*/
|
||||
if (!mm) {
|
||||
next->active_mm = oldmm;
|
||||
mmgrab(oldmm);
|
||||
|
@ -3344,6 +3357,9 @@ static void __sched notrace __schedule(bool preempt)
|
|||
* Make sure that signal_pending_state()->signal_pending() below
|
||||
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
|
||||
* done by the caller to avoid the race with signal_wake_up().
|
||||
*
|
||||
* The membarrier system call requires a full memory barrier
|
||||
* after coming from user-space, before storing to rq->curr.
|
||||
*/
|
||||
rq_lock(rq, &rf);
|
||||
smp_mb__after_spinlock();
|
||||
|
@ -3391,17 +3407,16 @@ static void __sched notrace __schedule(bool preempt)
|
|||
/*
|
||||
* The membarrier system call requires each architecture
|
||||
* to have a full memory barrier after updating
|
||||
* rq->curr, before returning to user-space. For TSO
|
||||
* (e.g. x86), the architecture must provide its own
|
||||
* barrier in switch_mm(). For weakly ordered machines
|
||||
* for which spin_unlock() acts as a full memory
|
||||
* barrier, finish_lock_switch() in common code takes
|
||||
* care of this barrier. For weakly ordered machines for
|
||||
* which spin_unlock() acts as a RELEASE barrier (only
|
||||
* arm64 and PowerPC), arm64 has a full barrier in
|
||||
* switch_to(), and PowerPC has
|
||||
* smp_mb__after_unlock_lock() before
|
||||
* finish_lock_switch().
|
||||
* rq->curr, before returning to user-space.
|
||||
*
|
||||
* Here are the schemes providing that barrier on the
|
||||
* various architectures:
|
||||
* - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
|
||||
* switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
|
||||
* - finish_lock_switch() for weakly-ordered
|
||||
* architectures where spin_unlock is a full barrier,
|
||||
* - switch_to() for arm64 (weakly-ordered, spin_unlock
|
||||
* is a RELEASE barrier),
|
||||
*/
|
||||
++*switch_count;
|
||||
|
||||
|
|
Loading…
Reference in New Issue