x86/entry/common: Protect against instrumentation

Mark the various syscall entries with noinstr to protect them against
instrumentation and add the noinstrumentation_begin()/end() annotations to mark the
parts of the functions which are safe to call out into instrumentable code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200505134340.520277507@linutronix.de
This commit is contained in:
Thomas Gleixner 2020-03-10 14:46:27 +01:00
parent 1723be30e4
commit 8f159f1dfa
1 changed files with 89 additions and 44 deletions

View File

@ -41,15 +41,26 @@
#ifdef CONFIG_CONTEXT_TRACKING #ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */ /* Called on entry from user mode with IRQs off. */
__visible inline noinstr void enter_from_user_mode(void) __visible noinstr void enter_from_user_mode(void)
{ {
CT_WARN_ON(ct_state() != CONTEXT_USER); enum ctx_state state = ct_state();
user_exit_irqoff(); user_exit_irqoff();
instrumentation_begin();
CT_WARN_ON(state != CONTEXT_USER);
instrumentation_end();
} }
#else #else
static inline void enter_from_user_mode(void) {} static inline void enter_from_user_mode(void) {}
#endif #endif
static noinstr void exit_to_user_mode(void)
{
user_enter_irqoff();
mds_user_clear_cpu_buffers();
}
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
@ -179,8 +190,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
} }
} }
/* Called with IRQs disabled. */ static void __prepare_exit_to_usermode(struct pt_regs *regs)
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{ {
struct thread_info *ti = current_thread_info(); struct thread_info *ti = current_thread_info();
u32 cached_flags; u32 cached_flags;
@ -219,10 +229,14 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
*/ */
ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif #endif
}
user_enter_irqoff(); __visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
{
mds_user_clear_cpu_buffers(); instrumentation_begin();
__prepare_exit_to_usermode(regs);
instrumentation_end();
exit_to_user_mode();
} }
#define SYSCALL_EXIT_WORK_FLAGS \ #define SYSCALL_EXIT_WORK_FLAGS \
@ -251,11 +265,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
tracehook_report_syscall_exit(regs, step); tracehook_report_syscall_exit(regs, step);
} }
/* static void __syscall_return_slowpath(struct pt_regs *regs)
* Called with IRQs on and fully valid regs. Returns with IRQs off in a
* state such that we can immediately switch to user mode.
*/
__visible inline void syscall_return_slowpath(struct pt_regs *regs)
{ {
struct thread_info *ti = current_thread_info(); struct thread_info *ti = current_thread_info();
u32 cached_flags = READ_ONCE(ti->flags); u32 cached_flags = READ_ONCE(ti->flags);
@ -276,15 +286,29 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
syscall_slow_exit_work(regs, cached_flags); syscall_slow_exit_work(regs, cached_flags);
local_irq_disable(); local_irq_disable();
prepare_exit_to_usermode(regs); __prepare_exit_to_usermode(regs);
}
/*
* Called with IRQs on and fully valid regs. Returns with IRQs off in a
* state such that we can immediately switch to user mode.
*/
__visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
{
instrumentation_begin();
__syscall_return_slowpath(regs);
instrumentation_end();
exit_to_user_mode();
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{ {
struct thread_info *ti; struct thread_info *ti;
enter_from_user_mode(); enter_from_user_mode();
instrumentation_begin();
local_irq_enable(); local_irq_enable();
ti = current_thread_info(); ti = current_thread_info();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
@ -301,8 +325,10 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
regs->ax = x32_sys_call_table[nr](regs); regs->ax = x32_sys_call_table[nr](regs);
#endif #endif
} }
__syscall_return_slowpath(regs);
syscall_return_slowpath(regs); instrumentation_end();
exit_to_user_mode();
} }
#endif #endif
@ -313,7 +339,7 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
* extremely hot in workloads that use it, and it's usually called from * extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance. * do_fast_syscall_32, so forcibly inline it to improve performance.
*/ */
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) static void do_syscall_32_irqs_on(struct pt_regs *regs)
{ {
struct thread_info *ti = current_thread_info(); struct thread_info *ti = current_thread_info();
unsigned int nr = (unsigned int)regs->orig_ax; unsigned int nr = (unsigned int)regs->orig_ax;
@ -337,27 +363,62 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
regs->ax = ia32_sys_call_table[nr](regs); regs->ax = ia32_sys_call_table[nr](regs);
} }
syscall_return_slowpath(regs); __syscall_return_slowpath(regs);
} }
/* Handles int $0x80 */ /* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs) __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{ {
enter_from_user_mode(); enter_from_user_mode();
instrumentation_begin();
local_irq_enable(); local_irq_enable();
do_syscall_32_irqs_on(regs); do_syscall_32_irqs_on(regs);
instrumentation_end();
exit_to_user_mode();
}
static bool __do_fast_syscall_32(struct pt_regs *regs)
{
int res;
/* Fetch EBP from where the vDSO stashed it. */
if (IS_ENABLED(CONFIG_X86_64)) {
/*
* Micro-optimization: the pointer we're following is
* explicitly 32 bits, so it can't be out of range.
*/
res = __get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp);
} else {
res = get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp);
}
if (res) {
/* User code screwed up. */
regs->ax = -EFAULT;
local_irq_disable();
__prepare_exit_to_usermode(regs);
return false;
}
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs);
return true;
} }
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs) __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
{ {
/* /*
* Called using the internal vDSO SYSENTER/SYSCALL32 calling * Called using the internal vDSO SYSENTER/SYSCALL32 calling
* convention. Adjust regs so it looks like we entered using int80. * convention. Adjust regs so it looks like we entered using int80.
*/ */
unsigned long landing_pad = (unsigned long)current->mm->context.vdso + unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
vdso_image_32.sym_int80_landing_pad; vdso_image_32.sym_int80_landing_pad;
bool success;
/* /*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
@ -367,33 +428,17 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
regs->ip = landing_pad; regs->ip = landing_pad;
enter_from_user_mode(); enter_from_user_mode();
instrumentation_begin();
local_irq_enable(); local_irq_enable();
success = __do_fast_syscall_32(regs);
/* Fetch EBP from where the vDSO stashed it. */ instrumentation_end();
if ( exit_to_user_mode();
#ifdef CONFIG_X86_64
/*
* Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range.
*/
__get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
) {
/* User code screwed up. */ /* If it failed, keep it simple: use IRET. */
local_irq_disable(); if (!success)
regs->ax = -EFAULT; return 0;
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* /*