Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "An unfortunately larger set of fixes, but a large portion is selftests: - Fix the missing clusterid initializaiton for x2apic cluster management which caused boot failures due to IPIs being sent to the wrong cluster - Drop TX_COMPAT when a 64bit executable is exec()'ed from a compat task - Wrap access to __supported_pte_mask in __startup_64() where clang compile fails due to a non PC relative access being generated. - Two fixes for 5 level paging fallout in the decompressor: - Handle GOT correctly for paging_prepare() and cleanup_trampoline() - Fix the page table handling in cleanup_trampoline() to avoid page table corruption. - Stop special casing protection key 0 as this is inconsistent with the manpage and also inconsistent with the allocation map handling. - Override the protection key wen moving away from PROT_EXEC to prevent inaccessible memory. - Fix and update the protection key selftests to address breakage and to cover the above issue - Add a MOV SS self test" [ Part of the x86 fixes were in the earlier core pull due to dependencies ] * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) x86/mm: Drop TS_COMPAT on 64-bit exec() syscall x86/apic/x2apic: Initialize cluster ID properly x86/boot/compressed/64: Fix moving page table out of trampoline memory x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline() x86/pkeys: Do not special case protection key 0 x86/pkeys/selftests: Add a test for pkey 0 x86/pkeys/selftests: Save off 'prot' for allocations x86/pkeys/selftests: Fix pointer math x86/pkeys: Override pkey when moving away from PROT_EXEC x86/pkeys/selftests: Fix pkey exhaustion test off-by-one x86/pkeys/selftests: Add PROT_EXEC test x86/pkeys/selftests: Factor out "instruction page" x86/pkeys/selftests: Allow faults on unknown keys x86/pkeys/selftests: Avoid printf-in-signal deadlocks x86/pkeys/selftests: Remove dead debugging code, fix dprint_in_signal x86/pkeys/selftests: Stop using assert() x86/pkeys/selftests: Give better unexpected fault error messages x86/selftests: Add mov_to_ss test x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI x86/pkeys/selftests: Adjust the self-test to fresh distros that export the pkeys ABI ...
This commit is contained in:
commit
8a6bd2f40e
|
@ -305,6 +305,25 @@ ENTRY(startup_64)
|
|||
/* Set up the stack */
|
||||
leaq boot_stack_end(%rbx), %rsp
|
||||
|
||||
/*
|
||||
* paging_prepare() and cleanup_trampoline() below can have GOT
|
||||
* references. Adjust the table with address we are running at.
|
||||
*
|
||||
* Zero RAX for adjust_got: the GOT was not adjusted before;
|
||||
* there's no adjustment to undo.
|
||||
*/
|
||||
xorq %rax, %rax
|
||||
|
||||
/*
|
||||
* Calculate the address the binary is loaded at and use it as
|
||||
* a GOT adjustment.
|
||||
*/
|
||||
call 1f
|
||||
1: popq %rdi
|
||||
subq $1b, %rdi
|
||||
|
||||
call adjust_got
|
||||
|
||||
/*
|
||||
* At this point we are in long mode with 4-level paging enabled,
|
||||
* but we might want to enable 5-level paging or vice versa.
|
||||
|
@ -370,10 +389,14 @@ trampoline_return:
|
|||
/*
|
||||
* cleanup_trampoline() would restore trampoline memory.
|
||||
*
|
||||
* RDI is address of the page table to use instead of page table
|
||||
* in trampoline memory (if required).
|
||||
*
|
||||
* RSI holds real mode data and needs to be preserved across
|
||||
* this function call.
|
||||
*/
|
||||
pushq %rsi
|
||||
leaq top_pgtable(%rbx), %rdi
|
||||
call cleanup_trampoline
|
||||
popq %rsi
|
||||
|
||||
|
@ -381,6 +404,21 @@ trampoline_return:
|
|||
pushq $0
|
||||
popfq
|
||||
|
||||
/*
|
||||
* Previously we've adjusted the GOT with address the binary was
|
||||
* loaded at. Now we need to re-adjust for relocation address.
|
||||
*
|
||||
* Calculate the address the binary is loaded at, so that we can
|
||||
* undo the previous GOT adjustment.
|
||||
*/
|
||||
call 1f
|
||||
1: popq %rax
|
||||
subq $1b, %rax
|
||||
|
||||
/* The new adjustment is the relocation address */
|
||||
movq %rbx, %rdi
|
||||
call adjust_got
|
||||
|
||||
/*
|
||||
* Copy the compressed kernel to the end of our buffer
|
||||
* where decompression in place becomes safe.
|
||||
|
@ -481,19 +519,6 @@ relocated:
|
|||
shrq $3, %rcx
|
||||
rep stosq
|
||||
|
||||
/*
|
||||
* Adjust our own GOT
|
||||
*/
|
||||
leaq _got(%rip), %rdx
|
||||
leaq _egot(%rip), %rcx
|
||||
1:
|
||||
cmpq %rcx, %rdx
|
||||
jae 2f
|
||||
addq %rbx, (%rdx)
|
||||
addq $8, %rdx
|
||||
jmp 1b
|
||||
2:
|
||||
|
||||
/*
|
||||
* Do the extraction, and jump to the new kernel..
|
||||
*/
|
||||
|
@ -512,6 +537,27 @@ relocated:
|
|||
*/
|
||||
jmp *%rax
|
||||
|
||||
/*
|
||||
* Adjust the global offset table
|
||||
*
|
||||
* RAX is the previous adjustment of the table to undo (use 0 if it's the
|
||||
* first time we touch GOT).
|
||||
* RDI is the new adjustment to apply.
|
||||
*/
|
||||
adjust_got:
|
||||
/* Walk through the GOT adding the address to the entries */
|
||||
leaq _got(%rip), %rdx
|
||||
leaq _egot(%rip), %rcx
|
||||
1:
|
||||
cmpq %rcx, %rdx
|
||||
jae 2f
|
||||
subq %rax, (%rdx) /* Undo previous adjustment */
|
||||
addq %rdi, (%rdx) /* Apply the new adjustment */
|
||||
addq $8, %rdx
|
||||
jmp 1b
|
||||
2:
|
||||
ret
|
||||
|
||||
.code32
|
||||
/*
|
||||
* This is the 32-bit trampoline that will be copied over to low memory.
|
||||
|
@ -649,3 +695,10 @@ boot_stack_end:
|
|||
.balign 4096
|
||||
pgtable:
|
||||
.fill BOOT_PGT_SIZE, 1, 0
|
||||
|
||||
/*
|
||||
* The page table is going to be used instead of page table in the trampoline
|
||||
* memory.
|
||||
*/
|
||||
top_pgtable:
|
||||
.fill PAGE_SIZE, 1, 0
|
||||
|
|
|
@ -22,14 +22,6 @@ struct paging_config {
|
|||
/* Buffer to preserve trampoline memory */
|
||||
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
|
||||
|
||||
/*
|
||||
* The page table is going to be used instead of page table in the trampoline
|
||||
* memory.
|
||||
*
|
||||
* It must not be in BSS as BSS is cleared after cleanup_trampoline().
|
||||
*/
|
||||
static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
|
||||
|
||||
/*
|
||||
* Trampoline address will be printed by extract_kernel() for debugging
|
||||
* purposes.
|
||||
|
@ -134,7 +126,7 @@ struct paging_config paging_prepare(void)
|
|||
return paging_config;
|
||||
}
|
||||
|
||||
void cleanup_trampoline(void)
|
||||
void cleanup_trampoline(void *pgtable)
|
||||
{
|
||||
void *trampoline_pgtable;
|
||||
|
||||
|
@ -145,8 +137,8 @@ void cleanup_trampoline(void)
|
|||
* if it's there.
|
||||
*/
|
||||
if ((void *)__native_read_cr3() == trampoline_pgtable) {
|
||||
memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
|
||||
native_write_cr3((unsigned long)top_pgtable);
|
||||
memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
|
||||
native_write_cr3((unsigned long)pgtable);
|
||||
}
|
||||
|
||||
/* Restore trampoline memory */
|
||||
|
|
|
@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,
|
|||
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
|
||||
/* pkey 0 is the default and always allocated */
|
||||
/* pkey 0 is the default and allocated implicitly */
|
||||
mm->context.pkey_allocation_map = 0x1;
|
||||
/* -1 means unallocated or invalid */
|
||||
mm->context.execute_only_pkey = -1;
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
#ifndef _ASM_X86_PKEYS_H
|
||||
#define _ASM_X86_PKEYS_H
|
||||
|
||||
#define ARCH_DEFAULT_PKEY 0
|
||||
|
||||
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
|
||||
|
||||
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||
|
@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
|
|||
static inline int execute_only_pkey(struct mm_struct *mm)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
return 0;
|
||||
return ARCH_DEFAULT_PKEY;
|
||||
|
||||
return __execute_only_pkey(mm);
|
||||
}
|
||||
|
@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
|
|||
{
|
||||
/*
|
||||
* "Allocated" pkeys are those that have been returned
|
||||
* from pkey_alloc(). pkey 0 is special, and never
|
||||
* returned from pkey_alloc().
|
||||
* from pkey_alloc() or pkey 0 which is allocated
|
||||
* implicitly when the mm is created.
|
||||
*/
|
||||
if (pkey <= 0)
|
||||
if (pkey < 0)
|
||||
return false;
|
||||
if (pkey >= arch_max_pkey())
|
||||
return false;
|
||||
/*
|
||||
* The exec-only pkey is set in the allocation map, but
|
||||
* is not available to any of the user interfaces like
|
||||
* mprotect_pkey().
|
||||
*/
|
||||
if (pkey == mm->context.execute_only_pkey)
|
||||
return false;
|
||||
|
||||
return mm_pkey_allocation_map(mm) & (1U << pkey);
|
||||
}
|
||||
|
||||
|
|
|
@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
|
|||
goto update;
|
||||
}
|
||||
cmsk = cluster_hotplug_mask;
|
||||
cmsk->clusterid = cluster;
|
||||
cluster_hotplug_mask = NULL;
|
||||
update:
|
||||
this_cpu_write(cluster_masks, cmsk);
|
||||
|
|
|
@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Code in __startup_64() can be relocated during execution, but the compiler
|
||||
* doesn't have to generate PC-relative relocations when accessing globals from
|
||||
* that function. Clang actually does not generate them, which leads to
|
||||
* boot-time crashes. To work around this problem, every global pointer must
|
||||
* be adjusted using fixup_pointer().
|
||||
*/
|
||||
unsigned long __head __startup_64(unsigned long physaddr,
|
||||
struct boot_params *bp)
|
||||
{
|
||||
|
@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
|||
p4dval_t *p4d;
|
||||
pudval_t *pud;
|
||||
pmdval_t *pmd, pmd_entry;
|
||||
pteval_t *mask_ptr;
|
||||
bool la57;
|
||||
int i;
|
||||
unsigned int *next_pgt_ptr;
|
||||
|
@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
|||
|
||||
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
|
||||
/* Filter out unsupported __PAGE_KERNEL_* bits: */
|
||||
pmd_entry &= __supported_pte_mask;
|
||||
mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
|
||||
pmd_entry &= *mask_ptr;
|
||||
pmd_entry += sme_get_me_mask();
|
||||
pmd_entry += physaddr;
|
||||
|
||||
|
|
|
@ -542,6 +542,7 @@ void set_personality_64bit(void)
|
|||
clear_thread_flag(TIF_X32);
|
||||
/* Pretend that this comes from a 64bit execve */
|
||||
task_pt_regs(current)->orig_ax = __NR_execve;
|
||||
current_thread_info()->status &= ~TS_COMPAT;
|
||||
|
||||
/* Ensure the corresponding mm is not marked. */
|
||||
if (current->mm)
|
||||
|
|
|
@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
|
|||
*/
|
||||
if (pkey != -1)
|
||||
return pkey;
|
||||
/*
|
||||
* Look for a protection-key-drive execute-only mapping
|
||||
* which is now being given permissions that are not
|
||||
* execute-only. Move it back to the default pkey.
|
||||
*/
|
||||
if (vma_is_pkey_exec_only(vma) &&
|
||||
(prot & (PROT_READ|PROT_WRITE))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The mapping is execute-only. Go try to get the
|
||||
* execute-only protection key. If we fail to do that,
|
||||
* fall through as if we do not have execute-only
|
||||
* support.
|
||||
* support in this mm.
|
||||
*/
|
||||
if (prot == PROT_EXEC) {
|
||||
pkey = execute_only_pkey(vma->vm_mm);
|
||||
if (pkey > 0)
|
||||
return pkey;
|
||||
} else if (vma_is_pkey_exec_only(vma)) {
|
||||
/*
|
||||
* Protections are *not* PROT_EXEC, but the mapping
|
||||
* is using the exec-only pkey. This mapping was
|
||||
* PROT_EXEC and will no longer be. Move back to
|
||||
* the default pkey.
|
||||
*/
|
||||
return ARCH_DEFAULT_PKEY;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a vanilla, non-pkey mprotect (or we failed to
|
||||
* setup execute-only), inherit the pkey from the VMA we
|
||||
|
|
|
@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
|
|||
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
|
||||
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
|
||||
protection_keys test_vdso test_vsyscall
|
||||
protection_keys test_vdso test_vsyscall mov_ss_trap
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP \
|
||||
vdso_restorer
|
||||
|
|
|
@ -0,0 +1,285 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
|
||||
*
|
||||
* This does MOV SS from a watchpointed address followed by various
|
||||
* types of kernel entries. A MOV SS that hits a watchpoint will queue
|
||||
* up a #DB trap but will not actually deliver that trap. The trap
|
||||
* will be delivered after the next instruction instead. The CPU's logic
|
||||
* seems to be:
|
||||
*
|
||||
* - Any fault: drop the pending #DB trap.
|
||||
* - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
|
||||
* deliver #DB.
|
||||
* - ICEBP: enter the kernel but do not deliver the watchpoint trap
|
||||
* - breakpoint: only one #DB is delivered (phew!)
|
||||
*
|
||||
* There are plenty of ways for a kernel to handle this incorrectly. This
|
||||
* test tries to exercise all the cases.
|
||||
*
|
||||
* This should mostly cover CVE-2018-1087 and CVE-2018-8897.
|
||||
*/
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/user.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <err.h>
|
||||
#include <string.h>
|
||||
#include <setjmp.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#define X86_EFLAGS_RF (1UL << 16)
|
||||
|
||||
#if __x86_64__
|
||||
# define REG_IP REG_RIP
|
||||
#else
|
||||
# define REG_IP REG_EIP
|
||||
#endif
|
||||
|
||||
unsigned short ss;
|
||||
extern unsigned char breakpoint_insn[];
|
||||
sigjmp_buf jmpbuf;
|
||||
static unsigned char altstack_data[SIGSTKSZ];
|
||||
|
||||
static void enable_watchpoint(void)
|
||||
{
|
||||
pid_t parent = getpid();
|
||||
int status;
|
||||
|
||||
pid_t child = fork();
|
||||
if (child < 0)
|
||||
err(1, "fork");
|
||||
|
||||
if (child) {
|
||||
if (waitpid(child, &status, 0) != child)
|
||||
err(1, "waitpid for child");
|
||||
} else {
|
||||
unsigned long dr0, dr1, dr7;
|
||||
|
||||
dr0 = (unsigned long)&ss;
|
||||
dr1 = (unsigned long)breakpoint_insn;
|
||||
dr7 = ((1UL << 1) | /* G0 */
|
||||
(3UL << 16) | /* RW0 = read or write */
|
||||
(1UL << 18) | /* LEN0 = 2 bytes */
|
||||
(1UL << 3)); /* G1, RW1 = insn */
|
||||
|
||||
if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
|
||||
err(1, "PTRACE_ATTACH");
|
||||
|
||||
if (waitpid(parent, &status, 0) != parent)
|
||||
err(1, "waitpid for child");
|
||||
|
||||
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
|
||||
err(1, "PTRACE_POKEUSER DR0");
|
||||
|
||||
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
|
||||
err(1, "PTRACE_POKEUSER DR1");
|
||||
|
||||
if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
|
||||
err(1, "PTRACE_POKEUSER DR7");
|
||||
|
||||
printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
|
||||
|
||||
if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
|
||||
err(1, "PTRACE_DETACH");
|
||||
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
int flags)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO | flags;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
}
|
||||
|
||||
static char const * const signames[] = {
|
||||
[SIGSEGV] = "SIGSEGV",
|
||||
[SIGBUS] = "SIBGUS",
|
||||
[SIGTRAP] = "SIGTRAP",
|
||||
[SIGILL] = "SIGILL",
|
||||
};
|
||||
|
||||
static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
|
||||
{
|
||||
ucontext_t *ctx = ctx_void;
|
||||
|
||||
printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
|
||||
(unsigned long)ctx->uc_mcontext.gregs[REG_IP],
|
||||
!!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
|
||||
}
|
||||
|
||||
static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
|
||||
{
|
||||
ucontext_t *ctx = ctx_void;
|
||||
|
||||
printf("\tGot %s with RIP=%lx\n", signames[sig],
|
||||
(unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
|
||||
}
|
||||
|
||||
static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
|
||||
{
|
||||
ucontext_t *ctx = ctx_void;
|
||||
|
||||
printf("\tGot %s with RIP=%lx\n", signames[sig],
|
||||
(unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
|
||||
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned long nr;
|
||||
|
||||
asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
|
||||
printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
|
||||
|
||||
if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
|
||||
printf("\tPR_SET_PTRACER_ANY succeeded\n");
|
||||
|
||||
printf("\tSet up a watchpoint\n");
|
||||
sethandler(SIGTRAP, sigtrap, 0);
|
||||
enable_watchpoint();
|
||||
|
||||
printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
|
||||
asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
|
||||
|
||||
printf("[RUN]\tMOV SS; INT3\n");
|
||||
asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
|
||||
|
||||
printf("[RUN]\tMOV SS; INT 3\n");
|
||||
asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
|
||||
|
||||
printf("[RUN]\tMOV SS; CS CS INT3\n");
|
||||
asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
|
||||
|
||||
printf("[RUN]\tMOV SS; CSx14 INT3\n");
|
||||
asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
|
||||
|
||||
printf("[RUN]\tMOV SS; INT 4\n");
|
||||
sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
|
||||
asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
|
||||
|
||||
#ifdef __i386__
|
||||
printf("[RUN]\tMOV SS; INTO\n");
|
||||
sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
|
||||
nr = -1;
|
||||
asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
|
||||
: [tmp] "+r" (nr) : [ss] "m" (ss));
|
||||
#endif
|
||||
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
printf("[RUN]\tMOV SS; ICEBP\n");
|
||||
|
||||
/* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
|
||||
sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
|
||||
|
||||
asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
|
||||
}
|
||||
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
printf("[RUN]\tMOV SS; CLI\n");
|
||||
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
|
||||
asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
|
||||
}
|
||||
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
printf("[RUN]\tMOV SS; #PF\n");
|
||||
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
|
||||
asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
|
||||
: [tmp] "=r" (nr) : [ss] "m" (ss));
|
||||
}
|
||||
|
||||
/*
|
||||
* INT $1: if #DB has DPL=3 and there isn't special handling,
|
||||
* then the kernel will die.
|
||||
*/
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
printf("[RUN]\tMOV SS; INT 1\n");
|
||||
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
|
||||
asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/*
|
||||
* In principle, we should test 32-bit SYSCALL as well, but
|
||||
* the calling convention is so unpredictable that it's
|
||||
* not obviously worth the effort.
|
||||
*/
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
printf("[RUN]\tMOV SS; SYSCALL\n");
|
||||
sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
|
||||
nr = SYS_getpid;
|
||||
/*
|
||||
* Toggle the high bit of RSP to make it noncanonical to
|
||||
* strengthen this test on non-SMAP systems.
|
||||
*/
|
||||
asm volatile ("btc $63, %%rsp\n\t"
|
||||
"mov %[ss], %%ss; syscall\n\t"
|
||||
"btc $63, %%rsp"
|
||||
: "+a" (nr) : [ss] "m" (ss)
|
||||
: "rcx"
|
||||
#ifdef __x86_64__
|
||||
, "r11"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
printf("[RUN]\tMOV SS; breakpointed NOP\n");
|
||||
asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
|
||||
|
||||
/*
|
||||
* Invoking SYSENTER directly breaks all the rules. Just handle
|
||||
* the SIGSEGV.
|
||||
*/
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
printf("[RUN]\tMOV SS; SYSENTER\n");
|
||||
stack_t stack = {
|
||||
.ss_sp = altstack_data,
|
||||
.ss_size = SIGSTKSZ,
|
||||
};
|
||||
if (sigaltstack(&stack, NULL) != 0)
|
||||
err(1, "sigaltstack");
|
||||
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
|
||||
nr = SYS_getpid;
|
||||
asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
|
||||
: [ss] "m" (ss) : "flags", "rcx"
|
||||
#ifdef __x86_64__
|
||||
, "r11"
|
||||
#endif
|
||||
);
|
||||
|
||||
/* We're unreachable here. SYSENTER forgets RIP. */
|
||||
}
|
||||
|
||||
if (sigsetjmp(jmpbuf, 1) == 0) {
|
||||
printf("[RUN]\tMOV SS; INT $0x80\n");
|
||||
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
|
||||
nr = 20; /* compat getpid */
|
||||
asm volatile ("mov %[ss], %%ss; int $0x80"
|
||||
: "+a" (nr) : [ss] "m" (ss)
|
||||
: "flags"
|
||||
#ifdef __x86_64__
|
||||
, "r8", "r9", "r10", "r11"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
printf("[OK]\tI aten't dead\n");
|
||||
return 0;
|
||||
}
|
|
@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
|
|||
uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
|
||||
unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
|
||||
|
||||
/* Failed address bound checks: */
|
||||
#ifndef SEGV_BNDERR
|
||||
# define SEGV_BNDERR 3
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The kernel is supposed to provide some information about the bounds
|
||||
* exception in the siginfo. It should match what we have in the bounds
|
||||
|
@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
|
|||
br_count++;
|
||||
dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
|
||||
|
||||
#define SEGV_BNDERR 3 /* failed address bound checks */
|
||||
|
||||
dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
|
||||
status, ip, br_reason);
|
||||
dprintf2("si_signo: %d\n", si->si_signo);
|
||||
|
|
|
@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...)
|
|||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
if (!dprint_in_signal) {
|
||||
va_start(ap, format);
|
||||
vprintf(format, ap);
|
||||
va_end(ap);
|
||||
} else {
|
||||
int ret;
|
||||
int len = vsnprintf(dprint_in_signal_buffer,
|
||||
DPRINT_IN_SIGNAL_BUF_SIZE,
|
||||
format, ap);
|
||||
/*
|
||||
* len is amount that would have been printed,
|
||||
* but actual write is truncated at BUF_SIZE.
|
||||
* No printf() functions are signal-safe.
|
||||
* They deadlock easily. Write the format
|
||||
* string to get some output, even if
|
||||
* incomplete.
|
||||
*/
|
||||
if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
|
||||
len = DPRINT_IN_SIGNAL_BUF_SIZE;
|
||||
ret = write(1, dprint_in_signal_buffer, len);
|
||||
ret = write(1, format, strlen(format));
|
||||
if (ret < 0)
|
||||
abort();
|
||||
exit(1);
|
||||
}
|
||||
va_end(ap);
|
||||
}
|
||||
#define dprintf_level(level, args...) do { \
|
||||
if (level <= DEBUG_LEVEL) \
|
||||
sigsafe_printf(args); \
|
||||
fflush(NULL); \
|
||||
} while (0)
|
||||
#define dprintf0(args...) dprintf_level(0, args)
|
||||
#define dprintf1(args...) dprintf_level(1, args)
|
||||
|
|
|
@ -72,10 +72,9 @@ extern void abort_hooks(void);
|
|||
test_nr, iteration_nr); \
|
||||
dprintf0("errno at assert: %d", errno); \
|
||||
abort_hooks(); \
|
||||
assert(condition); \
|
||||
exit(__LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
#define raw_assert(cond) assert(cond)
|
||||
|
||||
void cat_into_file(char *str, char *file)
|
||||
{
|
||||
|
@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file)
|
|||
* these need to be raw because they are called under
|
||||
* pkey_assert()
|
||||
*/
|
||||
raw_assert(fd >= 0);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "error opening '%s'\n", str);
|
||||
perror("error: ");
|
||||
exit(__LINE__);
|
||||
}
|
||||
|
||||
ret = write(fd, str, strlen(str));
|
||||
if (ret != strlen(str)) {
|
||||
perror("write to file failed");
|
||||
fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
|
||||
raw_assert(0);
|
||||
exit(__LINE__);
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me)
|
|||
#ifdef __i386__
|
||||
|
||||
#ifndef SYS_mprotect_key
|
||||
# define SYS_mprotect_key 380
|
||||
# define SYS_mprotect_key 380
|
||||
#endif
|
||||
|
||||
#ifndef SYS_pkey_alloc
|
||||
# define SYS_pkey_alloc 381
|
||||
# define SYS_pkey_free 382
|
||||
# define SYS_pkey_alloc 381
|
||||
# define SYS_pkey_free 382
|
||||
#endif
|
||||
#define REG_IP_IDX REG_EIP
|
||||
#define si_pkey_offset 0x14
|
||||
|
||||
#define REG_IP_IDX REG_EIP
|
||||
#define si_pkey_offset 0x14
|
||||
|
||||
#else
|
||||
|
||||
#ifndef SYS_mprotect_key
|
||||
# define SYS_mprotect_key 329
|
||||
# define SYS_mprotect_key 329
|
||||
#endif
|
||||
|
||||
#ifndef SYS_pkey_alloc
|
||||
# define SYS_pkey_alloc 330
|
||||
# define SYS_pkey_free 331
|
||||
# define SYS_pkey_alloc 330
|
||||
# define SYS_pkey_free 331
|
||||
#endif
|
||||
#define REG_IP_IDX REG_RIP
|
||||
#define si_pkey_offset 0x20
|
||||
|
||||
#define REG_IP_IDX REG_RIP
|
||||
#define si_pkey_offset 0x20
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes)
|
|||
}
|
||||
}
|
||||
|
||||
#define SEGV_BNDERR 3 /* failed address bound checks */
|
||||
#define SEGV_PKUERR 4
|
||||
/* Failed address bound checks: */
|
||||
#ifndef SEGV_BNDERR
|
||||
# define SEGV_BNDERR 3
|
||||
#endif
|
||||
|
||||
#ifndef SEGV_PKUERR
|
||||
# define SEGV_PKUERR 4
|
||||
#endif
|
||||
|
||||
static char *si_code_str(int si_code)
|
||||
{
|
||||
|
@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
|
|||
dump_mem(pkru_ptr - 128, 256);
|
||||
pkey_assert(*pkru_ptr);
|
||||
|
||||
si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
|
||||
dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
|
||||
dump_mem(si_pkey_ptr - 8, 24);
|
||||
siginfo_pkey = *si_pkey_ptr;
|
||||
pkey_assert(siginfo_pkey < NR_PKEYS);
|
||||
last_si_pkey = siginfo_pkey;
|
||||
|
||||
if ((si->si_code == SEGV_MAPERR) ||
|
||||
(si->si_code == SEGV_ACCERR) ||
|
||||
(si->si_code == SEGV_BNDERR)) {
|
||||
|
@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
|
|||
exit(4);
|
||||
}
|
||||
|
||||
si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
|
||||
dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
|
||||
dump_mem((u8 *)si_pkey_ptr - 8, 24);
|
||||
siginfo_pkey = *si_pkey_ptr;
|
||||
pkey_assert(siginfo_pkey < NR_PKEYS);
|
||||
last_si_pkey = siginfo_pkey;
|
||||
|
||||
dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
|
||||
/* need __rdpkru() version so we do not do shadow_pkru checking */
|
||||
dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
|
||||
|
@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
|
|||
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
|
||||
pkru_faults++;
|
||||
dprintf1("<<<<==================================================\n");
|
||||
return;
|
||||
if (trapno == 14) {
|
||||
fprintf(stderr,
|
||||
"ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
|
||||
trapno, ip);
|
||||
fprintf(stderr, "si_addr %p\n", si->si_addr);
|
||||
fprintf(stderr, "REG_ERR: %lx\n",
|
||||
(unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
|
||||
exit(1);
|
||||
} else {
|
||||
fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
|
||||
fprintf(stderr, "si_addr %p\n", si->si_addr);
|
||||
fprintf(stderr, "REG_ERR: %lx\n",
|
||||
(unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
|
||||
exit(2);
|
||||
}
|
||||
dprint_in_signal = 0;
|
||||
}
|
||||
|
||||
|
@ -393,10 +391,15 @@ pid_t fork_lazy_child(void)
|
|||
return forkret;
|
||||
}
|
||||
|
||||
#define PKEY_DISABLE_ACCESS 0x1
|
||||
#define PKEY_DISABLE_WRITE 0x2
|
||||
#ifndef PKEY_DISABLE_ACCESS
|
||||
# define PKEY_DISABLE_ACCESS 0x1
|
||||
#endif
|
||||
|
||||
u32 pkey_get(int pkey, unsigned long flags)
|
||||
#ifndef PKEY_DISABLE_WRITE
|
||||
# define PKEY_DISABLE_WRITE 0x2
|
||||
#endif
|
||||
|
||||
static u32 hw_pkey_get(int pkey, unsigned long flags)
|
||||
{
|
||||
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
|
||||
u32 pkru = __rdpkru();
|
||||
|
@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags)
|
|||
return masked_pkru;
|
||||
}
|
||||
|
||||
int pkey_set(int pkey, unsigned long rights, unsigned long flags)
|
||||
static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
|
||||
{
|
||||
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
|
||||
u32 old_pkru = __rdpkru();
|
||||
|
@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags)
|
|||
pkey, flags);
|
||||
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
|
||||
|
||||
pkey_rights = pkey_get(pkey, syscall_flags);
|
||||
pkey_rights = hw_pkey_get(pkey, syscall_flags);
|
||||
|
||||
dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
|
||||
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
|
||||
pkey, pkey, pkey_rights);
|
||||
pkey_assert(pkey_rights >= 0);
|
||||
|
||||
pkey_rights |= flags;
|
||||
|
||||
ret = pkey_set(pkey, pkey_rights, syscall_flags);
|
||||
ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
|
||||
assert(!ret);
|
||||
/*pkru and flags have the same format */
|
||||
shadow_pkru |= flags << (pkey * 2);
|
||||
|
@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags)
|
|||
|
||||
pkey_assert(ret >= 0);
|
||||
|
||||
pkey_rights = pkey_get(pkey, syscall_flags);
|
||||
dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
|
||||
pkey_rights = hw_pkey_get(pkey, syscall_flags);
|
||||
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
|
||||
pkey, pkey, pkey_rights);
|
||||
|
||||
dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
|
||||
|
@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags)
|
|||
{
|
||||
unsigned long syscall_flags = 0;
|
||||
int ret;
|
||||
int pkey_rights = pkey_get(pkey, syscall_flags);
|
||||
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
|
||||
u32 orig_pkru = rdpkru();
|
||||
|
||||
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
|
||||
|
||||
dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
|
||||
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
|
||||
pkey, pkey, pkey_rights);
|
||||
pkey_assert(pkey_rights >= 0);
|
||||
|
||||
pkey_rights |= flags;
|
||||
|
||||
ret = pkey_set(pkey, pkey_rights, 0);
|
||||
ret = hw_pkey_set(pkey, pkey_rights, 0);
|
||||
/* pkru and flags have the same format */
|
||||
shadow_pkru &= ~(flags << (pkey * 2));
|
||||
pkey_assert(ret >= 0);
|
||||
|
||||
pkey_rights = pkey_get(pkey, syscall_flags);
|
||||
dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
|
||||
pkey_rights = hw_pkey_get(pkey, syscall_flags);
|
||||
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
|
||||
pkey, pkey, pkey_rights);
|
||||
|
||||
dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
|
||||
|
@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
|
|||
struct pkey_malloc_record {
|
||||
void *ptr;
|
||||
long size;
|
||||
int prot;
|
||||
};
|
||||
struct pkey_malloc_record *pkey_malloc_records;
|
||||
struct pkey_malloc_record *pkey_last_malloc_record;
|
||||
long nr_pkey_malloc_records;
|
||||
void record_pkey_malloc(void *ptr, long size)
|
||||
void record_pkey_malloc(void *ptr, long size, int prot)
|
||||
{
|
||||
long i;
|
||||
struct pkey_malloc_record *rec = NULL;
|
||||
|
@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size)
|
|||
(int)(rec - pkey_malloc_records), rec, ptr, size);
|
||||
rec->ptr = ptr;
|
||||
rec->size = size;
|
||||
rec->prot = prot;
|
||||
pkey_last_malloc_record = rec;
|
||||
nr_pkey_malloc_records++;
|
||||
}
|
||||
|
||||
|
@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
|
|||
pkey_assert(ptr != (void *)-1);
|
||||
ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
|
||||
pkey_assert(!ret);
|
||||
record_pkey_malloc(ptr, size);
|
||||
record_pkey_malloc(ptr, size, prot);
|
||||
rdpkru();
|
||||
|
||||
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
|
||||
|
@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
|
|||
size = ALIGN_UP(size, HPAGE_SIZE * 2);
|
||||
ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
|
||||
pkey_assert(ptr != (void *)-1);
|
||||
record_pkey_malloc(ptr, size);
|
||||
record_pkey_malloc(ptr, size, prot);
|
||||
mprotect_pkey(ptr, size, prot, pkey);
|
||||
|
||||
dprintf1("unaligned ptr: %p\n", ptr);
|
||||
|
@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
|
|||
pkey_assert(ptr != (void *)-1);
|
||||
mprotect_pkey(ptr, size, prot, pkey);
|
||||
|
||||
record_pkey_malloc(ptr, size);
|
||||
record_pkey_malloc(ptr, size, prot);
|
||||
|
||||
dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
|
||||
return ptr;
|
||||
|
@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
|
|||
|
||||
mprotect_pkey(ptr, size, prot, pkey);
|
||||
|
||||
record_pkey_malloc(ptr, size);
|
||||
record_pkey_malloc(ptr, size, prot);
|
||||
|
||||
dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
|
||||
close(fd);
|
||||
|
@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey)
|
|||
}
|
||||
|
||||
int last_pkru_faults;
|
||||
#define UNKNOWN_PKEY -2
|
||||
void expected_pk_fault(int pkey)
|
||||
{
|
||||
dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
|
||||
__func__, last_pkru_faults, pkru_faults);
|
||||
dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
|
||||
pkey_assert(last_pkru_faults + 1 == pkru_faults);
|
||||
pkey_assert(last_si_pkey == pkey);
|
||||
|
||||
/*
|
||||
* For exec-only memory, we do not know the pkey in
|
||||
* advance, so skip this check.
|
||||
*/
|
||||
if (pkey != UNKNOWN_PKEY)
|
||||
pkey_assert(last_si_pkey == pkey);
|
||||
|
||||
/*
|
||||
* The signal handler shold have cleared out PKRU to let the
|
||||
* test program continue. We now have to restore it.
|
||||
|
@ -939,10 +954,11 @@ void expected_pk_fault(int pkey)
|
|||
last_si_pkey = -1;
|
||||
}
|
||||
|
||||
void do_not_expect_pk_fault(void)
|
||||
{
|
||||
pkey_assert(last_pkru_faults == pkru_faults);
|
||||
}
|
||||
#define do_not_expect_pk_fault(msg) do { \
|
||||
if (last_pkru_faults != pkru_faults) \
|
||||
dprintf0("unexpected PK fault: %s\n", msg); \
|
||||
pkey_assert(last_pkru_faults == pkru_faults); \
|
||||
} while (0)
|
||||
|
||||
int test_fds[10] = { -1 };
|
||||
int nr_test_fds;
|
||||
|
@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
|
|||
pkey_assert(i < NR_PKEYS*2);
|
||||
|
||||
/*
|
||||
* There are 16 pkeys supported in hardware. One is taken
|
||||
* up for the default (0) and another can be taken up by
|
||||
* an execute-only mapping. Ensure that we can allocate
|
||||
* at least 14 (16-2).
|
||||
* There are 16 pkeys supported in hardware. Three are
|
||||
* allocated by the time we get here:
|
||||
* 1. The default key (0)
|
||||
* 2. One possibly consumed by an execute-only mapping.
|
||||
* 3. One allocated by the test code and passed in via
|
||||
* 'pkey' to this function.
|
||||
* Ensure that we can allocate at least another 13 (16-3).
|
||||
*/
|
||||
pkey_assert(i >= NR_PKEYS-2);
|
||||
pkey_assert(i >= NR_PKEYS-3);
|
||||
|
||||
for (i = 0; i < nr_allocated_pkeys; i++) {
|
||||
err = sys_pkey_free(allocated_pkeys[i]);
|
||||
|
@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* pkey 0 is special. It is allocated by default, so you do not
|
||||
* have to call pkey_alloc() to use it first. Make sure that it
|
||||
* is usable.
|
||||
*/
|
||||
void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
|
||||
{
|
||||
long size;
|
||||
int prot;
|
||||
|
||||
assert(pkey_last_malloc_record);
|
||||
size = pkey_last_malloc_record->size;
|
||||
/*
|
||||
* This is a bit of a hack. But mprotect() requires
|
||||
* huge-page-aligned sizes when operating on hugetlbfs.
|
||||
* So, make sure that we use something that's a multiple
|
||||
* of a huge page when we can.
|
||||
*/
|
||||
if (size >= HPAGE_SIZE)
|
||||
size = HPAGE_SIZE;
|
||||
prot = pkey_last_malloc_record->prot;
|
||||
|
||||
/* Use pkey 0 */
|
||||
mprotect_pkey(ptr, size, prot, 0);
|
||||
|
||||
/* Make sure that we can set it back to the original pkey. */
|
||||
mprotect_pkey(ptr, size, prot, pkey);
|
||||
}
|
||||
|
||||
void test_ptrace_of_child(int *ptr, u16 pkey)
|
||||
{
|
||||
__attribute__((__unused__)) int peek_result;
|
||||
|
@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
|
|||
pkey_assert(ret != -1);
|
||||
/* Now access from the current task, and expect NO exception: */
|
||||
peek_result = read_ptr(plain_ptr);
|
||||
do_not_expect_pk_fault();
|
||||
do_not_expect_pk_fault("read plain pointer after ptrace");
|
||||
|
||||
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
|
||||
pkey_assert(ret != -1);
|
||||
|
@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
|
|||
free(plain_ptr_unaligned);
|
||||
}
|
||||
|
||||
void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
|
||||
void *get_pointer_to_instructions(void)
|
||||
{
|
||||
void *p1;
|
||||
int scratch;
|
||||
int ptr_contents;
|
||||
int ret;
|
||||
|
||||
p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
|
||||
dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
|
||||
|
@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
|
|||
/* Point 'p1' at the *second* page of the function: */
|
||||
p1 += PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Try to ensure we fault this in on next touch to ensure
|
||||
* we get an instruction fault as opposed to a data one
|
||||
*/
|
||||
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
|
||||
|
||||
return p1;
|
||||
}
|
||||
|
||||
void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
|
||||
{
|
||||
void *p1;
|
||||
int scratch;
|
||||
int ptr_contents;
|
||||
int ret;
|
||||
|
||||
p1 = get_pointer_to_instructions();
|
||||
lots_o_noops_around_write(&scratch);
|
||||
ptr_contents = read_ptr(p1);
|
||||
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
|
||||
|
@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
|
|||
*/
|
||||
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
|
||||
lots_o_noops_around_write(&scratch);
|
||||
do_not_expect_pk_fault();
|
||||
do_not_expect_pk_fault("executing on PROT_EXEC memory");
|
||||
ptr_contents = read_ptr(p1);
|
||||
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
|
||||
expected_pk_fault(pkey);
|
||||
}
|
||||
|
||||
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
|
||||
{
|
||||
void *p1;
|
||||
int scratch;
|
||||
int ptr_contents;
|
||||
int ret;
|
||||
|
||||
dprintf1("%s() start\n", __func__);
|
||||
|
||||
p1 = get_pointer_to_instructions();
|
||||
lots_o_noops_around_write(&scratch);
|
||||
ptr_contents = read_ptr(p1);
|
||||
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
|
||||
|
||||
/* Use a *normal* mprotect(), not mprotect_pkey(): */
|
||||
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
|
||||
pkey_assert(!ret);
|
||||
|
||||
dprintf2("pkru: %x\n", rdpkru());
|
||||
|
||||
/* Make sure this is an *instruction* fault */
|
||||
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
|
||||
lots_o_noops_around_write(&scratch);
|
||||
do_not_expect_pk_fault("executing on PROT_EXEC memory");
|
||||
ptr_contents = read_ptr(p1);
|
||||
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
|
||||
expected_pk_fault(UNKNOWN_PKEY);
|
||||
|
||||
/*
|
||||
* Put the memory back to non-PROT_EXEC. Should clear the
|
||||
* exec-only pkey off the VMA and allow it to be readable
|
||||
* again. Go to PROT_NONE first to check for a kernel bug
|
||||
* that did not clear the pkey when doing PROT_NONE.
|
||||
*/
|
||||
ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
|
||||
pkey_assert(!ret);
|
||||
|
||||
ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
|
||||
pkey_assert(!ret);
|
||||
ptr_contents = read_ptr(p1);
|
||||
do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
|
||||
}
|
||||
|
||||
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
|
||||
{
|
||||
int size = PAGE_SIZE;
|
||||
|
@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
|
|||
test_kernel_gup_of_access_disabled_region,
|
||||
test_kernel_gup_write_to_write_disabled_region,
|
||||
test_executing_on_unreadable_memory,
|
||||
test_implicit_mprotect_exec_only_memory,
|
||||
test_mprotect_with_pkey_0,
|
||||
test_ptrace_of_child,
|
||||
test_pkey_syscalls_on_non_allocated_pkey,
|
||||
test_pkey_syscalls_bad_args,
|
||||
|
|
Loading…
Reference in New Issue