linux/arch/x86/include/asm/syscall_wrapper.h

71 lines
2.5 KiB
C
Raw Normal View History

syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems: Each syscall defines a stub which takes struct pt_regs as its only argument. It decodes just those parameters it needs, e.g: asmlinkage long sys_xyzzy(const struct pt_regs *regs) { return SyS_xyzzy(regs->di, regs->si, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. For example, for sys_recv() which is a 4-parameter syscall, the assembly now is (in slightly reordered fashion): <sys_recv>: callq <__fentry__> /* decode regs->di, ->si, ->dx and ->r10 */ mov 0x70(%rdi),%rdi mov 0x68(%rdi),%rsi mov 0x60(%rdi),%rdx mov 0x38(%rdi),%rcx [ SyS_recv() is automatically inlined by the compiler, as it is not [yet] used anywhere else ] /* clear %r9 and %r8, the 5th and 6th args */ xor %r9d,%r9d xor %r8d,%r8d /* do the actual work */ callq __sys_recvfrom /* cleanup and return */ cltq retq The only valid place in an x86-64 kernel which rightfully calls a syscall function on its own -- vsyscall -- needs to be modified to pass struct pt_regs onwards as well. To keep the syscall table generation working independent of SYSCALL_PTREGS being enabled, the stubs are named the same as the "original" syscall stubs, i.e. sys_*(). This patch is based on an original proof-of-concept | From: Linus Torvalds <torvalds@linux-foundation.org> | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> and was split up and heavily modified by me, in particular to base it on ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being, and to update the vsyscall to the new calling convention. Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-04-05 17:53:02 +08:00
/* SPDX-License-Identifier: GPL-2.0 */
/*
* syscall_wrapper.h - x86 specific wrappers to syscall definitions
*/
#ifndef _ASM_X86_SYSCALL_WRAPPER_H
#define _ASM_X86_SYSCALL_WRAPPER_H
/*
* Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes
* struct pt_regs *regs as the only argument of the syscall stub named
* sys_*(). It decodes just the registers it needs and passes them on to
* the SyS_*() wrapper and then to the SYSC_*() function doing the actual job.
* These wrappers and functions are inlined, meaning that the assembly looks
* as follows (slightly re-ordered):
*
* <sys_recv>: <-- syscall with 4 parameters
* callq <__fentry__>
*
* mov 0x70(%rdi),%rdi <-- decode regs->di
* mov 0x68(%rdi),%rsi <-- decode regs->si
* mov 0x60(%rdi),%rdx <-- decode regs->dx
* mov 0x38(%rdi),%rcx <-- decode regs->r10
*
* xor %r9d,%r9d <-- clear %r9
* xor %r8d,%r8d <-- clear %r8
*
* callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
* which takes 6 arguments
*
* cltq <-- extend return value to 64-bit
* retq <-- return
*
* This approach avoids leaking random user-provided register content down
* the call chain.
*
* As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
* obvious reasons, and passing struct pt_regs *regs to it in %rdi does not
* hurt, there is no need to override it.
*/
#define __SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long sys##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(sys##name, ERRNO); \
static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
asmlinkage long sys##name(const struct pt_regs *regs) \
{ \
return SyS##name(__MAP(x,__SC_ARGS \
,,regs->di,,regs->si,,regs->dx \
,,regs->r10,,regs->r8,,regs->r9)); \
} \
static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
/*
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
struct pt_regs;
asmlinkage long sys_getcpu(const struct pt_regs *regs); /* di,si,dx */
asmlinkage long sys_gettimeofday(const struct pt_regs *regs); /* di,si */
asmlinkage long sys_time(const struct pt_regs *regs); /* di */
#endif /* _ASM_X86_SYSCALL_WRAPPER_H */