mirror of https://gitee.com/openkylin/linux.git
x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls
The STACKLEAK feature (initially developed by PaX Team) has the following benefits: 1. Reduces the information that can be revealed through kernel stack leak bugs. The idea of erasing the thread stack at the end of syscalls is similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel crypto, which all comply with FDP_RIP.2 (Full Residual Information Protection) of the Common Criteria standard. 2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712, CVE-2010-2963). That kind of bugs should be killed by improving C compilers in future, which might take a long time. This commit introduces the code filling the used part of the kernel stack with a poison value before returning to userspace. Full STACKLEAK feature also contains the gcc plugin which comes in a separate commit. The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Performance impact: Hardware: Intel Core i7-4770, 16 GB RAM Test #1: building the Linux kernel on a single core 0.91% slowdown Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P 4.2% slowdown So the STACKLEAK description in Kconfig includes: "The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary and you are advised to test this feature on your expected workload before deploying it". Signed-off-by: Alexander Popov <alex.popov@linux.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Kees Cook <keescook@chromium.org>
This commit is contained in:
parent
57361846b5
commit
afaef01c00
|
@ -24,6 +24,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
|
||||||
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
||||||
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
||||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||||
|
STACKLEAK_POISON value in this last hole: ffffffffffff4111
|
||||||
|
|
||||||
Virtual memory map with 5 level page tables:
|
Virtual memory map with 5 level page tables:
|
||||||
|
|
||||||
|
@ -50,6 +51,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
|
||||||
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
||||||
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
||||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||||
|
STACKLEAK_POISON value in this last hole: ffffffffffff4111
|
||||||
|
|
||||||
Architecture defines a 64-bit virtual address. Implementations can support
|
Architecture defines a 64-bit virtual address. Implementations can support
|
||||||
less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
|
less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
|
||||||
|
|
|
@ -419,6 +419,13 @@ config SECCOMP_FILTER
|
||||||
|
|
||||||
See Documentation/userspace-api/seccomp_filter.rst for details.
|
See Documentation/userspace-api/seccomp_filter.rst for details.
|
||||||
|
|
||||||
|
config HAVE_ARCH_STACKLEAK
|
||||||
|
bool
|
||||||
|
help
|
||||||
|
An architecture should select this if it has the code which
|
||||||
|
fills the used part of the kernel stack with the STACKLEAK_POISON
|
||||||
|
value before returning from system calls.
|
||||||
|
|
||||||
config HAVE_STACKPROTECTOR
|
config HAVE_STACKPROTECTOR
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
|
|
|
@ -127,6 +127,7 @@ config X86
|
||||||
select HAVE_ARCH_PREL32_RELOCATIONS
|
select HAVE_ARCH_PREL32_RELOCATIONS
|
||||||
select HAVE_ARCH_SECCOMP_FILTER
|
select HAVE_ARCH_SECCOMP_FILTER
|
||||||
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
|
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
|
||||||
|
select HAVE_ARCH_STACKLEAK
|
||||||
select HAVE_ARCH_TRACEHOOK
|
select HAVE_ARCH_TRACEHOOK
|
||||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
|
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
|
||||||
|
|
|
@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
.macro STACKLEAK_ERASE_NOCLOBBER
|
||||||
|
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
|
||||||
|
PUSH_AND_CLEAR_REGS
|
||||||
|
call stackleak_erase
|
||||||
|
POP_REGS
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif /* CONFIG_X86_64 */
|
||||||
|
|
||||||
|
.macro STACKLEAK_ERASE
|
||||||
|
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
|
||||||
|
call stackleak_erase
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This does 'call enter_from_user_mode' unless we can avoid it based on
|
* This does 'call enter_from_user_mode' unless we can avoid it based on
|
||||||
* kernel config or using the static jump infrastructure.
|
* kernel config or using the static jump infrastructure.
|
||||||
|
|
|
@ -46,6 +46,8 @@
|
||||||
#include <asm/frame.h>
|
#include <asm/frame.h>
|
||||||
#include <asm/nospec-branch.h>
|
#include <asm/nospec-branch.h>
|
||||||
|
|
||||||
|
#include "calling.h"
|
||||||
|
|
||||||
.section .entry.text, "ax"
|
.section .entry.text, "ax"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -711,6 +713,7 @@ ENTRY(ret_from_fork)
|
||||||
/* When we fork, we trace the syscall return in the child, too. */
|
/* When we fork, we trace the syscall return in the child, too. */
|
||||||
movl %esp, %eax
|
movl %esp, %eax
|
||||||
call syscall_return_slowpath
|
call syscall_return_slowpath
|
||||||
|
STACKLEAK_ERASE
|
||||||
jmp restore_all
|
jmp restore_all
|
||||||
|
|
||||||
/* kernel thread */
|
/* kernel thread */
|
||||||
|
@ -885,6 +888,8 @@ ENTRY(entry_SYSENTER_32)
|
||||||
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
|
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
|
||||||
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
|
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
|
||||||
|
|
||||||
|
STACKLEAK_ERASE
|
||||||
|
|
||||||
/* Opportunistic SYSEXIT */
|
/* Opportunistic SYSEXIT */
|
||||||
TRACE_IRQS_ON /* User mode traces as IRQs on. */
|
TRACE_IRQS_ON /* User mode traces as IRQs on. */
|
||||||
|
|
||||||
|
@ -996,6 +1001,8 @@ ENTRY(entry_INT80_32)
|
||||||
call do_int80_syscall_32
|
call do_int80_syscall_32
|
||||||
.Lsyscall_32_done:
|
.Lsyscall_32_done:
|
||||||
|
|
||||||
|
STACKLEAK_ERASE
|
||||||
|
|
||||||
restore_all:
|
restore_all:
|
||||||
TRACE_IRQS_IRET
|
TRACE_IRQS_IRET
|
||||||
SWITCH_TO_ENTRY_STACK
|
SWITCH_TO_ENTRY_STACK
|
||||||
|
|
|
@ -329,6 +329,8 @@ syscall_return_via_sysret:
|
||||||
* We are on the trampoline stack. All regs except RDI are live.
|
* We are on the trampoline stack. All regs except RDI are live.
|
||||||
* We can do future final exit work right here.
|
* We can do future final exit work right here.
|
||||||
*/
|
*/
|
||||||
|
STACKLEAK_ERASE_NOCLOBBER
|
||||||
|
|
||||||
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||||
|
|
||||||
popq %rdi
|
popq %rdi
|
||||||
|
@ -688,6 +690,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||||
* We are on the trampoline stack. All regs except RDI are live.
|
* We are on the trampoline stack. All regs except RDI are live.
|
||||||
* We can do future final exit work right here.
|
* We can do future final exit work right here.
|
||||||
*/
|
*/
|
||||||
|
STACKLEAK_ERASE_NOCLOBBER
|
||||||
|
|
||||||
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||||
|
|
||||||
|
|
|
@ -261,6 +261,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
|
||||||
|
|
||||||
/* Opportunistic SYSRET */
|
/* Opportunistic SYSRET */
|
||||||
sysret32_from_system_call:
|
sysret32_from_system_call:
|
||||||
|
/*
|
||||||
|
* We are not going to return to userspace from the trampoline
|
||||||
|
* stack. So let's erase the thread stack right now.
|
||||||
|
*/
|
||||||
|
STACKLEAK_ERASE
|
||||||
TRACE_IRQS_ON /* User mode traces as IRQs on. */
|
TRACE_IRQS_ON /* User mode traces as IRQs on. */
|
||||||
movq RBX(%rsp), %rbx /* pt_regs->rbx */
|
movq RBX(%rsp), %rbx /* pt_regs->rbx */
|
||||||
movq RBP(%rsp), %rbp /* pt_regs->rbp */
|
movq RBP(%rsp), %rbp /* pt_regs->rbp */
|
||||||
|
|
|
@ -1192,6 +1192,10 @@ struct task_struct {
|
||||||
void *security;
|
void *security;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
|
||||||
|
unsigned long lowest_stack;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* New fields for task_struct should be added above here, so that
|
* New fields for task_struct should be added above here, so that
|
||||||
* they are included in the randomized portion of task_struct.
|
* they are included in the randomized portion of task_struct.
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _LINUX_STACKLEAK_H
|
||||||
|
#define _LINUX_STACKLEAK_H
|
||||||
|
|
||||||
|
#include <linux/sched.h>
|
||||||
|
#include <linux/sched/task_stack.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check that the poison value points to the unused hole in the
|
||||||
|
* virtual memory map for your platform.
|
||||||
|
*/
|
||||||
|
#define STACKLEAK_POISON -0xBEEF
|
||||||
|
#define STACKLEAK_SEARCH_DEPTH 128
|
||||||
|
|
||||||
|
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
|
||||||
|
#include <asm/stacktrace.h>
|
||||||
|
|
||||||
|
static inline void stackleak_task_init(struct task_struct *t)
|
||||||
|
{
|
||||||
|
t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
|
||||||
|
}
|
||||||
|
#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
|
||||||
|
static inline void stackleak_task_init(struct task_struct *t) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -117,6 +117,10 @@ obj-$(CONFIG_HAS_IOMEM) += iomem.o
|
||||||
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
|
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
|
||||||
obj-$(CONFIG_RSEQ) += rseq.o
|
obj-$(CONFIG_RSEQ) += rseq.o
|
||||||
|
|
||||||
|
obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
|
||||||
|
KASAN_SANITIZE_stackleak.o := n
|
||||||
|
KCOV_INSTRUMENT_stackleak.o := n
|
||||||
|
|
||||||
$(obj)/configs.o: $(obj)/config_data.h
|
$(obj)/configs.o: $(obj)/config_data.h
|
||||||
|
|
||||||
targets += config_data.gz
|
targets += config_data.gz
|
||||||
|
|
|
@ -91,6 +91,7 @@
|
||||||
#include <linux/kcov.h>
|
#include <linux/kcov.h>
|
||||||
#include <linux/livepatch.h>
|
#include <linux/livepatch.h>
|
||||||
#include <linux/thread_info.h>
|
#include <linux/thread_info.h>
|
||||||
|
#include <linux/stackleak.h>
|
||||||
|
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
|
@ -1880,6 +1881,8 @@ static __latent_entropy struct task_struct *copy_process(
|
||||||
if (retval)
|
if (retval)
|
||||||
goto bad_fork_cleanup_io;
|
goto bad_fork_cleanup_io;
|
||||||
|
|
||||||
|
stackleak_task_init(p);
|
||||||
|
|
||||||
if (pid != &init_struct_pid) {
|
if (pid != &init_struct_pid) {
|
||||||
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
|
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
|
||||||
if (IS_ERR(pid)) {
|
if (IS_ERR(pid)) {
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* This code fills the used part of the kernel stack with a poison value
|
||||||
|
* before returning to userspace. It's part of the STACKLEAK feature
|
||||||
|
* ported from grsecurity/PaX.
|
||||||
|
*
|
||||||
|
* Author: Alexander Popov <alex.popov@linux.com>
|
||||||
|
*
|
||||||
|
* STACKLEAK reduces the information which kernel stack leak bugs can
|
||||||
|
* reveal and blocks some uninitialized stack variable attacks.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/stackleak.h>
|
||||||
|
|
||||||
|
asmlinkage void stackleak_erase(void)
|
||||||
|
{
|
||||||
|
/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
|
||||||
|
unsigned long kstack_ptr = current->lowest_stack;
|
||||||
|
unsigned long boundary = (unsigned long)end_of_stack(current);
|
||||||
|
unsigned int poison_count = 0;
|
||||||
|
const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
|
||||||
|
|
||||||
|
/* Check that 'lowest_stack' value is sane */
|
||||||
|
if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
|
||||||
|
kstack_ptr = boundary;
|
||||||
|
|
||||||
|
/* Search for the poison value in the kernel stack */
|
||||||
|
while (kstack_ptr > boundary && poison_count <= depth) {
|
||||||
|
if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
|
||||||
|
poison_count++;
|
||||||
|
else
|
||||||
|
poison_count = 0;
|
||||||
|
|
||||||
|
kstack_ptr -= sizeof(unsigned long);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* One 'long int' at the bottom of the thread stack is reserved and
|
||||||
|
* should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
|
||||||
|
*/
|
||||||
|
if (kstack_ptr == boundary)
|
||||||
|
kstack_ptr += sizeof(unsigned long);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now write the poison value to the kernel stack. Start from
|
||||||
|
* 'kstack_ptr' and move up till the new 'boundary'. We assume that
|
||||||
|
* the stack pointer doesn't change when we write poison.
|
||||||
|
*/
|
||||||
|
if (on_thread_stack())
|
||||||
|
boundary = current_stack_pointer;
|
||||||
|
else
|
||||||
|
boundary = current_top_of_stack();
|
||||||
|
|
||||||
|
while (kstack_ptr < boundary) {
|
||||||
|
*(unsigned long *)kstack_ptr = STACKLEAK_POISON;
|
||||||
|
kstack_ptr += sizeof(unsigned long);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reset the 'lowest_stack' value for the next syscall */
|
||||||
|
current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
|
||||||
|
}
|
||||||
|
|
|
@ -139,4 +139,23 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
|
||||||
in structures. This reduces the performance hit of RANDSTRUCT
|
in structures. This reduces the performance hit of RANDSTRUCT
|
||||||
at the cost of weakened randomization.
|
at the cost of weakened randomization.
|
||||||
|
|
||||||
|
config GCC_PLUGIN_STACKLEAK
|
||||||
|
bool "Erase the kernel stack before returning from syscalls"
|
||||||
|
depends on GCC_PLUGINS
|
||||||
|
depends on HAVE_ARCH_STACKLEAK
|
||||||
|
help
|
||||||
|
This option makes the kernel erase the kernel stack before
|
||||||
|
returning from system calls. That reduces the information which
|
||||||
|
kernel stack leak bugs can reveal and blocks some uninitialized
|
||||||
|
stack variable attacks.
|
||||||
|
|
||||||
|
The tradeoff is the performance impact: on a single CPU system kernel
|
||||||
|
compilation sees a 1% slowdown, other systems and workloads may vary
|
||||||
|
and you are advised to test this feature on your expected workload
|
||||||
|
before deploying it.
|
||||||
|
|
||||||
|
This plugin was ported from grsecurity/PaX. More information at:
|
||||||
|
* https://grsecurity.net/
|
||||||
|
* https://pax.grsecurity.net/
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
Loading…
Reference in New Issue