sched/x86: Rewrite the switch_to() code
Move the low-level context switch code to an out-of-line asm stub instead of using complex inline asm. This allows constructing a new stack frame for the child process to make it seamlessly flow to ret_from_fork without an extra test and branch in __switch_to(). It also improves code generation for __schedule() by using the C calling convention instead of clobbering all registers. Signed-off-by: Brian Gerst <brgerst@gmail.com> Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1471106302-10159-5-git-send-email-brgerst@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
7b32aeadbc
commit
0100301bfd
|
@ -204,6 +204,43 @@
|
|||
POP_GS_EX
|
||||
.endm
|
||||
|
||||
/*
|
||||
* %eax: prev task
|
||||
* %edx: next task
|
||||
*/
|
||||
ENTRY(__switch_to_asm)
|
||||
/*
|
||||
* Save callee-saved registers
|
||||
* This must match the order in struct inactive_task_frame
|
||||
*/
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
|
||||
/* switch stack */
|
||||
movl %esp, TASK_threadsp(%eax)
|
||||
movl TASK_threadsp(%edx), %esp
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
movl TASK_stack_canary(%edx), %ebx
|
||||
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
|
||||
#endif
|
||||
|
||||
/* restore callee-saved registers */
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
|
||||
jmp __switch_to
|
||||
END(__switch_to_asm)
|
||||
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
*
|
||||
* eax: prev task we switched from
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
pushl %eax
|
||||
call schedule_tail
|
||||
|
|
|
@ -367,14 +367,49 @@ END(ptregs_\func)
|
|||
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
|
||||
#include <asm/syscalls_64.h>
|
||||
|
||||
/*
|
||||
* %rdi: prev task
|
||||
* %rsi: next task
|
||||
*/
|
||||
ENTRY(__switch_to_asm)
|
||||
/*
|
||||
* Save callee-saved registers
|
||||
* This must match the order in inactive_task_frame
|
||||
*/
|
||||
pushq %rbp
|
||||
pushq %rbx
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
|
||||
/* switch stack */
|
||||
movq %rsp, TASK_threadsp(%rdi)
|
||||
movq TASK_threadsp(%rsi), %rsp
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
movq TASK_stack_canary(%rsi), %rbx
|
||||
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
|
||||
#endif
|
||||
|
||||
/* restore callee-saved registers */
|
||||
popq %r15
|
||||
popq %r14
|
||||
popq %r13
|
||||
popq %r12
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
|
||||
jmp __switch_to
|
||||
END(__switch_to_asm)
|
||||
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
*
|
||||
* rdi: prev task we switched from
|
||||
* rax: prev task we switched from
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
LOCK ; btr $TIF_FORK, TI_flags(%r8)
|
||||
|
||||
movq %rax, %rdi
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
testb $3, CS(%rsp) /* from kernel_thread? */
|
||||
|
|
|
@ -389,9 +389,6 @@ struct thread_struct {
|
|||
unsigned short fsindex;
|
||||
unsigned short gsindex;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long ip;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long fsbase;
|
||||
unsigned long gsbase;
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
#define _ASM_X86_SWITCH_TO_H
|
||||
|
||||
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
||||
|
||||
struct task_struct *__switch_to_asm(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
|
||||
__visible struct task_struct *__switch_to(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
struct tss_struct;
|
||||
|
@ -32,131 +36,30 @@ static inline void prepare_switch_to(struct task_struct *prev,
|
|||
|
||||
/* data that is pointed to by thread.sp */
|
||||
struct inactive_task_frame {
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long r15;
|
||||
unsigned long r14;
|
||||
unsigned long r13;
|
||||
unsigned long r12;
|
||||
#else
|
||||
unsigned long si;
|
||||
unsigned long di;
|
||||
#endif
|
||||
unsigned long bx;
|
||||
unsigned long bp;
|
||||
unsigned long ret_addr;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
struct fork_frame {
|
||||
struct inactive_task_frame frame;
|
||||
struct pt_regs regs;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
#define __switch_canary \
|
||||
"movl %P[task_canary](%[next]), %%ebx\n\t" \
|
||||
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
|
||||
#define __switch_canary_oparam \
|
||||
, [stack_canary] "=m" (stack_canary.canary)
|
||||
#define __switch_canary_iparam \
|
||||
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
|
||||
#else /* CC_STACKPROTECTOR */
|
||||
#define __switch_canary
|
||||
#define __switch_canary_oparam
|
||||
#define __switch_canary_iparam
|
||||
#endif /* CC_STACKPROTECTOR */
|
||||
|
||||
/*
|
||||
* Saving eflags is important. It switches not only IOPL between tasks,
|
||||
* it also protects other tasks from NT leaking through sysenter etc.
|
||||
*/
|
||||
#define switch_to(prev, next, last) \
|
||||
do { \
|
||||
/* \
|
||||
* Context-switching clobbers all registers, so we clobber \
|
||||
* them explicitly, via unused output variables. \
|
||||
* (EAX and EBP is not listed because EBP is saved/restored \
|
||||
* explicitly for wchan access and EAX is the return value of \
|
||||
* __switch_to()) \
|
||||
*/ \
|
||||
unsigned long ebx, ecx, edx, esi, edi; \
|
||||
\
|
||||
prepare_switch_to(prev, next); \
|
||||
\
|
||||
asm volatile("pushl %%ebp\n\t" /* save EBP */ \
|
||||
"movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
|
||||
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
|
||||
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
|
||||
"pushl %[next_ip]\n\t" /* restore EIP */ \
|
||||
__switch_canary \
|
||||
"jmp __switch_to\n" /* regparm call */ \
|
||||
"1:\t" \
|
||||
"popl %%ebp\n\t" /* restore EBP */ \
|
||||
\
|
||||
/* output parameters */ \
|
||||
: [prev_sp] "=m" (prev->thread.sp), \
|
||||
[prev_ip] "=m" (prev->thread.ip), \
|
||||
"=a" (last), \
|
||||
\
|
||||
/* clobbered output registers: */ \
|
||||
"=b" (ebx), "=c" (ecx), "=d" (edx), \
|
||||
"=S" (esi), "=D" (edi) \
|
||||
\
|
||||
__switch_canary_oparam \
|
||||
\
|
||||
/* input parameters: */ \
|
||||
: [next_sp] "m" (next->thread.sp), \
|
||||
[next_ip] "m" (next->thread.ip), \
|
||||
\
|
||||
/* regparm parameters for __switch_to(): */ \
|
||||
[prev] "a" (prev), \
|
||||
[next] "d" (next) \
|
||||
\
|
||||
__switch_canary_iparam \
|
||||
\
|
||||
: /* reloaded segment registers */ \
|
||||
"memory"); \
|
||||
((last) = __switch_to_asm((prev), (next))); \
|
||||
} while (0)
|
||||
|
||||
#else /* CONFIG_X86_32 */
|
||||
|
||||
/* frame pointer must be last for get_wchan */
|
||||
#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
|
||||
#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
|
||||
|
||||
#define __EXTRA_CLOBBER \
|
||||
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
|
||||
"r12", "r13", "r14", "r15", "flags"
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
#define __switch_canary \
|
||||
"movq %P[task_canary](%%rsi),%%r8\n\t" \
|
||||
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
|
||||
#define __switch_canary_oparam \
|
||||
, [gs_canary] "=m" (irq_stack_union.stack_canary)
|
||||
#define __switch_canary_iparam \
|
||||
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
|
||||
#else /* CC_STACKPROTECTOR */
|
||||
#define __switch_canary
|
||||
#define __switch_canary_oparam
|
||||
#define __switch_canary_iparam
|
||||
#endif /* CC_STACKPROTECTOR */
|
||||
|
||||
/*
|
||||
* There is no need to save or restore flags, because flags are always
|
||||
* clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
|
||||
* has no effect.
|
||||
*/
|
||||
#define switch_to(prev, next, last) \
|
||||
prepare_switch_to(prev, next); \
|
||||
\
|
||||
asm volatile(SAVE_CONTEXT \
|
||||
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
|
||||
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
|
||||
"call __switch_to\n\t" \
|
||||
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
|
||||
__switch_canary \
|
||||
"movq %P[thread_info](%%rsi),%%r8\n\t" \
|
||||
"movq %%rax,%%rdi\n\t" \
|
||||
"testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
|
||||
"jnz ret_from_fork\n\t" \
|
||||
RESTORE_CONTEXT \
|
||||
: "=a" (last) \
|
||||
__switch_canary_oparam \
|
||||
: [next] "S" (next), [prev] "D" (prev), \
|
||||
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
|
||||
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
|
||||
[_tif_fork] "i" (_TIF_FORK), \
|
||||
[thread_info] "i" (offsetof(struct task_struct, stack)), \
|
||||
[current_task] "m" (current_task) \
|
||||
__switch_canary_iparam \
|
||||
: "memory", "cc" __EXTRA_CLOBBER)
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||
|
|
|
@ -95,7 +95,6 @@ struct thread_info {
|
|||
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
||||
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
||||
#define TIF_IA32 17 /* IA32 compatibility process */
|
||||
#define TIF_FORK 18 /* ret_from_fork */
|
||||
#define TIF_NOHZ 19 /* in adaptive nohz mode */
|
||||
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
||||
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
||||
|
@ -119,7 +118,6 @@ struct thread_info {
|
|||
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
||||
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
||||
#define _TIF_IA32 (1 << TIF_IA32)
|
||||
#define _TIF_FORK (1 << TIF_FORK)
|
||||
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
||||
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
||||
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
||||
|
|
|
@ -28,6 +28,12 @@
|
|||
#endif
|
||||
|
||||
void common(void) {
|
||||
BLANK();
|
||||
OFFSET(TASK_threadsp, task_struct, thread.sp);
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
OFFSET(TASK_stack_canary, task_struct, stack_canary);
|
||||
#endif
|
||||
|
||||
BLANK();
|
||||
OFFSET(TI_flags, thread_info, flags);
|
||||
OFFSET(TI_status, thread_info, status);
|
||||
|
|
|
@ -57,6 +57,11 @@ void foo(void)
|
|||
/* Size of SYSENTER_stack */
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
BLANK();
|
||||
OFFSET(stack_canary_offset, stack_canary, canary);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
|
||||
BLANK();
|
||||
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
|
||||
|
|
|
@ -56,6 +56,11 @@ int main(void)
|
|||
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
|
||||
BLANK();
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
|
||||
BLANK();
|
||||
#endif
|
||||
|
||||
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
|
||||
DEFINE(NR_syscalls, sizeof(syscalls_64));
|
||||
|
||||
|
|
|
@ -133,17 +133,20 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
|||
unsigned long arg, struct task_struct *p, unsigned long tls)
|
||||
{
|
||||
struct pt_regs *childregs = task_pt_regs(p);
|
||||
struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
|
||||
struct inactive_task_frame *frame = &fork_frame->frame;
|
||||
struct task_struct *tsk;
|
||||
int err;
|
||||
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
frame->bp = 0;
|
||||
p->thread.sp = (unsigned long) fork_frame;
|
||||
p->thread.sp0 = (unsigned long) (childregs+1);
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||
/* kernel thread */
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
p->thread.ip = (unsigned long) ret_from_kernel_thread;
|
||||
frame->ret_addr = (unsigned long) ret_from_kernel_thread;
|
||||
task_user_gs(p) = __KERNEL_STACK_CANARY;
|
||||
childregs->ds = __USER_DS;
|
||||
childregs->es = __USER_DS;
|
||||
|
@ -161,7 +164,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
|||
if (sp)
|
||||
childregs->sp = sp;
|
||||
|
||||
p->thread.ip = (unsigned long) ret_from_fork;
|
||||
frame->ret_addr = (unsigned long) ret_from_fork;
|
||||
task_user_gs(p) = get_user_gs(current_pt_regs());
|
||||
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
|
|
@ -141,12 +141,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
|||
{
|
||||
int err;
|
||||
struct pt_regs *childregs;
|
||||
struct fork_frame *fork_frame;
|
||||
struct inactive_task_frame *frame;
|
||||
struct task_struct *me = current;
|
||||
|
||||
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
|
||||
childregs = task_pt_regs(p);
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
set_tsk_thread_flag(p, TIF_FORK);
|
||||
fork_frame = container_of(childregs, struct fork_frame, regs);
|
||||
frame = &fork_frame->frame;
|
||||
frame->bp = 0;
|
||||
frame->ret_addr = (unsigned long) ret_from_fork;
|
||||
p->thread.sp = (unsigned long) fork_frame;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
||||
savesegment(gs, p->thread.gsindex);
|
||||
|
|
|
@ -942,7 +942,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
|||
per_cpu(cpu_current_top_of_stack, cpu) =
|
||||
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
|
||||
#else
|
||||
clear_tsk_thread_flag(idle, TIF_FORK);
|
||||
initial_gs = per_cpu_offset(cpu);
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue