2019-05-27 14:55:01 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-11-09 10:04:06 +08:00
|
|
|
#ifndef _ASM_POWERPC_CURRENT_H
|
|
|
|
#define _ASM_POWERPC_CURRENT_H
|
2005-12-17 05:43:46 +08:00
|
|
|
#ifdef __KERNEL__
|
2005-11-09 10:04:06 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct task_struct;
|
|
|
|
|
|
|
|
#ifdef __powerpc64__
|
2007-02-26 03:04:18 +08:00
|
|
|
#include <linux/stddef.h>
|
2005-11-09 10:04:06 +08:00
|
|
|
#include <asm/paca.h>
|
|
|
|
|
[POWERPC] Make current preempt-safe
Repeated -j20 kernel builds on a G5 Quad running an SMP PREEMPT kernel
would often collapse within a day, some exec failing with "Bad address".
In each case examined, load_elf_binary was doing a kernel_read, but
generic_file_aio_read's access_ok saw current->thread.fs.seg as USER_DS
instead of KERNEL_DS.
objdump of filemap.o shows gcc 4.1.0 emitting "mr r5,r13 ... ld r9,416(r5)"
here for get_paca()->__current, instead of the expected and much more usual
"ld r9,416(r13)"; I've seen other gcc4s do the same, but perhaps not gcc3s.
So, if the task is preempted and rescheduled on a different cpu in between
the mr and the ld, r5 will be looking at a different paca_struct from the
one it's now on, pick up the wrong __current, and perhaps the wrong seg.
Presumably much worse could happen elsewhere, though that split is rare.
Other architectures appear to be safe (x86_64's read_pda is more limiting
than get_paca), but ppc64 needs to force "current" into one instruction.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-11-01 02:39:31 +08:00
|
|
|
static inline struct task_struct *get_current(void)
|
|
|
|
{
|
|
|
|
struct task_struct *task;
|
|
|
|
|
powerpc/64: allow compiler to cache 'current'
current may be cached by the compiler, so remove the volatile asm
restriction. This results in better generated code, as well as being
smaller and fewer dependent loads, it can avoid store-hit-load flushes
like this one that shows up in irq_exit():
preempt_count_sub(HARDIRQ_OFFSET);
if (!in_interrupt() && ...)
Which ends up as:
((struct thread_info *)current)->preempt_count -= HARDIRQ_OFFSET;
if (((struct thread_info *)current)->preempt_count ...
Evaluating current twice presently means it has to be loaded twice, and
here gcc happens to pick a different register each time, then
preempt_count is accessed via that base register:
1058: ld r10,2392(r13) <-- current
105c: lwz r9,0(r10) <-- preempt_count
1060: addis r9,r9,-1
1064: stw r9,0(r10) <-- preempt_count
1068: ld r9,2392(r13) <-- current
106c: lwz r9,0(r9) <-- preempt_count
1070: rlwinm. r9,r9,0,11,23
1074: bne 1090 <irq_exit+0x60>
This can frustrate store-hit-load detection heuristics and cause
flushes. Allowing the compiler to cache current in a reigster with this
patch results in the same base register being used for all accesses,
which is more likely to be detected as an alias:
1058: ld r31,2392(r13)
...
1070: lwz r9,0(r31)
1074: addis r9,r9,-1
1078: stw r9,0(r31)
107c: lwz r9,0(r31)
1080: rlwinm. r9,r9,0,11,23
1084: bne 10a0 <irq_exit+0x60>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20190612140317.24490-1-npiggin@gmail.com
2019-06-12 22:03:17 +08:00
|
|
|
/* get_current can be cached by the compiler, so no volatile */
|
|
|
|
asm ("ld %0,%1(13)"
|
[POWERPC] Make current preempt-safe
Repeated -j20 kernel builds on a G5 Quad running an SMP PREEMPT kernel
would often collapse within a day, some exec failing with "Bad address".
In each case examined, load_elf_binary was doing a kernel_read, but
generic_file_aio_read's access_ok saw current->thread.fs.seg as USER_DS
instead of KERNEL_DS.
objdump of filemap.o shows gcc 4.1.0 emitting "mr r5,r13 ... ld r9,416(r5)"
here for get_paca()->__current, instead of the expected and much more usual
"ld r9,416(r13)"; I've seen other gcc4s do the same, but perhaps not gcc3s.
So, if the task is preempted and rescheduled on a different cpu in between
the mr and the ld, r5 will be looking at a different paca_struct from the
one it's now on, pick up the wrong __current, and perhaps the wrong seg.
Presumably much worse could happen elsewhere, though that split is rare.
Other architectures appear to be safe (x86_64's read_pda is more limiting
than get_paca), but ppc64 needs to force "current" into one instruction.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-11-01 02:39:31 +08:00
|
|
|
: "=r" (task)
|
|
|
|
: "i" (offsetof(struct paca_struct, __current)));
|
|
|
|
|
|
|
|
return task;
|
|
|
|
}
|
|
|
|
#define current get_current()
|
2005-11-09 10:04:06 +08:00
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We keep `current' in r2 for speed.
|
|
|
|
*/
|
|
|
|
register struct task_struct *current asm ("r2");
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2005-12-17 05:43:46 +08:00
|
|
|
#endif /* __KERNEL__ */
|
2005-11-09 10:04:06 +08:00
|
|
|
#endif /* _ASM_POWERPC_CURRENT_H */
|