From 6ab80d88f82e84e331e79ca4b7e2ca2fe63c8c2f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:47 -0500 Subject: [PATCH 01/23] exit/doublefault: Remove apparently bogus comment about rewind_stack_do_exit I do not see panic calling rewind_stack_do_exit anywhere, nor can I find anywhere in the history where doublefault_shim has called rewind_stack_do_exit. So I don't think this comment was ever actually correct. Cc: Andy Lutomirski Fixes: 7d8d8cfdee9a ("x86/doublefault/32: Rewrite the x86_32 #DF handler and unify with 64-bit") Link: https://lkml.kernel.org/r/20211020174406.17889-1-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/x86/kernel/doublefault_32.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index d1d49e3d536b..3b58d8703094 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -77,9 +77,6 @@ asmlinkage noinstr void __noreturn doublefault_shim(void) * some way to reconstruct CR3. We could make a credible guess based * on cpu_tlbstate, but that would be racy and would not account for * PTI. - * - * Instead, don't bother. We can return through - * rewind_stack_do_exit() instead. */ panic("cannot return from double fault\n"); } From 9fd5a04d8efcbf511286dd36c46fd70a645b167d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:48 -0500 Subject: [PATCH 02/23] exit: Remove calls of do_exit after noreturn versions of die On nds32, openrisc, s390, sh, and xtensa the function die never returns. Mark die __noreturn so that no one expects die to return. Remove the do_exit calls after die as they will never be reached. Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: openrisc@lists.librecores.org Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: linux-s390@vger.kernel.org Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: Chris Zankel Cc: Max Filippov Fixes: 2.3.16 Fixes: 2.3.99-pre8 Fixes: 3f65ce4d141e ("[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 5") Fixes: 664eec400bf8 ("nds32: MMU fault handling and page table management") Fixes: 61e85e367535 ("OpenRISC: Memory management") Link: https://lkml.kernel.org/r/20211020174406.17889-2-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/nds32/kernel/traps.c | 2 +- arch/nds32/mm/fault.c | 6 +----- arch/openrisc/kernel/traps.c | 2 +- arch/openrisc/mm/fault.c | 4 +--- arch/s390/include/asm/kdebug.h | 2 +- arch/s390/kernel/dumpstack.c | 2 +- arch/s390/mm/fault.c | 2 -- arch/sh/kernel/traps.c | 2 +- arch/sh/mm/fault.c | 2 -- arch/xtensa/kernel/traps.c | 2 +- arch/xtensa/mm/fault.c | 3 +-- 11 files changed, 9 insertions(+), 20 deletions(-) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index f06421c645af..ca75d475eda4 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -118,7 +118,7 @@ DEFINE_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. */ -void die(const char *str, struct pt_regs *regs, int err) +void __noreturn die(const char *str, struct pt_regs *regs, int err) { struct task_struct *tsk = current; static int die_counter; diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index f02524eb6d56..1d139b117168 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -13,7 +13,7 @@ #include -extern void die(const char *str, struct pt_regs *regs, long err); +extern void __noreturn die(const char *str, struct pt_regs *regs, long err); /* * This is useful to dump out the page tables associated with @@ -299,10 +299,6 @@ void do_page_fault(unsigned long entry, unsigned long addr, show_pte(mm, addr); die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); - - return; /* * We ran out of memory, or some other thing happened to us that made diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index aa1e709405ac..0898cb159fac 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -197,7 +197,7 @@ void nommu_dump_state(struct pt_regs *regs, } /* This is normally the 'Oops' routine */ -void die(const char *str, struct pt_regs *regs, long err) +void __noreturn die(const char *str, struct pt_regs *regs, long err) { console_verbose(); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index c730d1a51686..f0fa6394a58e 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -32,7 +32,7 @@ unsigned long pte_errors; /* updated by do_page_fault() */ */ volatile pgd_t *current_pgd[NR_CPUS]; -extern void die(char *, struct pt_regs *, long); +extern void __noreturn die(char *, struct pt_regs *, long); /* * This routine handles page faults. It determines the address, @@ -248,8 +248,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, die("Oops", regs, write_acc); - do_exit(SIGKILL); - /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h index d5327f064799..4377238e4752 100644 --- a/arch/s390/include/asm/kdebug.h +++ b/arch/s390/include/asm/kdebug.h @@ -23,6 +23,6 @@ enum die_val { DIE_NMI_IPI, }; -extern void die(struct pt_regs *, const char *); +extern void __noreturn die(struct pt_regs *, const char *); #endif diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index db1bc00229ca..f45e66b8bed6 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -192,7 +192,7 @@ void show_regs(struct pt_regs *regs) static DEFINE_SPINLOCK(die_lock); -void die(struct pt_regs *regs, const char *str) +void __noreturn die(struct pt_regs *regs, const char *str) { static int die_counter; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 212632d57db9..d30f5986fa85 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -260,7 +260,6 @@ static noinline void do_no_context(struct pt_regs *regs) " in virtual user address space\n"); dump_fault_info(regs); die(regs, "Oops"); - do_exit(SIGKILL); } static noinline void do_low_address(struct pt_regs *regs) @@ -270,7 +269,6 @@ static noinline void do_low_address(struct pt_regs *regs) if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ die (regs, "Low-address protection"); - do_exit(SIGKILL); } do_no_context(regs); diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index e76b22157099..cbe3201d4f21 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -20,7 +20,7 @@ static DEFINE_SPINLOCK(die_lock); -void die(const char *str, struct pt_regs *regs, long err) +void __noreturn die(const char *str, struct pt_regs *regs, long err) { static int die_counter; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 88a1f453d73e..1e1aa75df3ca 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -238,8 +238,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, address); die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); } static void diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 874b6efc6fb3..fb056a191339 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -527,7 +527,7 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) DEFINE_SPINLOCK(die_lock); -void die(const char * str, struct pt_regs * regs, long err) +void __noreturn die(const char * str, struct pt_regs * regs, long err) { static int die_counter; const char *pr = ""; diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 95a74890c7e9..fd6a70635962 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -238,7 +238,7 @@ void do_page_fault(struct pt_regs *regs) void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { - extern void die(const char*, struct pt_regs*, long); + extern void __noreturn die(const char*, struct pt_regs*, long); const struct exception_table_entry *entry; /* Are we prepared to handle this kernel fault? */ @@ -257,5 +257,4 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) "address %08lx\n pc = %08lx, ra = %08lx\n", address, regs->pc, regs->areg[0]); die("Oops", regs, sig); - do_exit(sig); } From a52f60fa2905b4abb26235d0a11cff13ced92709 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:49 -0500 Subject: [PATCH 03/23] reboot: Remove the unreachable panic after do_exit in reboot(2) Link: https://lkml.kernel.org/r/20211020174406.17889-3-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/reboot.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index f7440c0c7e43..d6e0f9fb7f04 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -359,7 +359,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, case LINUX_REBOOT_CMD_HALT: kernel_halt(); do_exit(0); - panic("cannot halt"); case LINUX_REBOOT_CMD_POWER_OFF: kernel_power_off(); From 97cae848270731e4224681368f2061c94a9fc588 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:50 -0500 Subject: [PATCH 04/23] signal/sparc32: Remove unreachable do_exit in do_sparc_fault The call to do_exit in do_sparc_fault immediately follows a call to unhandled_fault. The function unhandled_fault never returns. This means the call to do_exit can never be reached. Cc: David Miller Cc: sparclinux@vger.kernel.org Fixes: 2.3.41 Link: https://lkml.kernel.org/r/20211020174406.17889-4-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/mm/fault_32.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index fa858626b85b..90dc4ae315c8 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -248,7 +248,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, } unhandled_fault(address, tsk, regs); - do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made From 95bf9d646c3c3f95cb0be7e703b371db8da5be68 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:51 -0500 Subject: [PATCH 05/23] signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT When an instruction to save or restore a register from the stack fails in _save_fp_context or _restore_fp_context return with -EFAULT. This change was made to r2300_fpu.S[1] but it looks like it got lost with the introduction of EX2[2]. This is also what the other implementation of _save_fp_context and _restore_fp_context in r4k_fpu.S does, and what is needed for the callers to be able to handle the error. Furthermore calling do_exit(SIGSEGV) from bad_stack is wrong because it does not terminate the entire process it just terminates a single thread. As the changed code was the only caller of arch/mips/kernel/syscall.c:bad_stack remove the problematic and now unused helper function. Cc: Thomas Bogendoerfer Cc: Maciej Rozycki Cc: linux-mips@vger.kernel.org [1] 35938a00ba86 ("MIPS: Fix ISA I FP sigcontext access violation handling") [2] f92722dc4545 ("MIPS: Correct MIPS I FP sigcontext layout") Cc: stable@vger.kernel.org Fixes: f92722dc4545 ("MIPS: Correct MIPS I FP sigcontext layout") Acked-by: Maciej W. Rozycki Acked-by: Thomas Bogendoerfer Link: https://lkml.kernel.org/r/20211020174406.17889-5-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/mips/kernel/r2300_fpu.S | 4 ++-- arch/mips/kernel/syscall.c | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 12e58053544f..cbf6db98cfb3 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -29,8 +29,8 @@ #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ - PTR 9b+4,bad_stack; \ + PTR 9b,fault; \ + PTR 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 2afa3eef486a..5512cd586e6e 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -240,12 +240,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op) { return -ENOSYS; } - -/* - * If we ever come here the user sp is bad. Zap the process right away. - * Due to the bad stack signaling wouldn't work. - */ -asmlinkage void bad_stack(void) -{ - do_exit(SIGSEGV); -} From ce0ee4e6ac99606f3945f4d47775544edc3f7985 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:52 -0500 Subject: [PATCH 06/23] signal/sh: Use force_sig(SIGKILL) instead of do_group_exit(SIGKILL) Today the sh code allocates memory the first time a process uses the fpu. If that memory allocation fails, kill the affected task with force_sig(SIGKILL) rather than do_group_exit(SIGKILL). Calling do_group_exit from an exception handler can potentially lead to dead locks as do_group_exit is not designed to be called from interrupt context. Instead use force_sig(SIGKILL) to kill the userspace process. Sending signals in general and force_sig in particular has been tested from interrupt context so there should be no problems. Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Fixes: 0ea820cf9bf5 ("sh: Move over to dynamically allocated FPU context.") Link: https://lkml.kernel.org/r/20211020174406.17889-6-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sh/kernel/cpu/fpu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index ae354a2931e7..fd6db0ab1928 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs) } if (!tsk_used_math(tsk)) { - local_irq_enable(); + int ret; /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + local_irq_enable(); + ret = init_fpu(tsk); + local_irq_disable(); + if (ret) { /* * ran out of memory! */ - do_group_exit(SIGKILL); + force_sig(SIGKILL); return; } - local_irq_disable(); } grab_fpu(regs); From 83a1f27ad773b1d8f0460d3a676114c7651918cc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:53 -0500 Subject: [PATCH 07/23] signal/powerpc: On swapcontext failure force SIGSEGV If the register state may be partial and corrupted instead of calling do_exit, call force_sigsegv(SIGSEGV). Which properly kills the process with SIGSEGV and does not let any more userspace code execute, instead of just killing one thread of the process and potentially confusing everything. Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org History-tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Fixes: 756f1ae8a44e ("PPC32: Rework signal code and add a swapcontext system call.") Fixes: 04879b04bf50 ("[PATCH] ppc64: VMX (Altivec) support & signal32 rework, from Ben Herrenschmidt") Link: https://lkml.kernel.org/r/20211020174406.17889-7-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/powerpc/kernel/signal_32.c | 6 ++++-- arch/powerpc/kernel/signal_64.c | 9 ++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 0608581967f0..666f3da41232 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1062,8 +1062,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * or if another thread unmaps the region containing the context. * We kill the task with a SIGSEGV in this situation. */ - if (do_setcontext(new_ctx, regs, 0)) - do_exit(SIGSEGV); + if (do_setcontext(new_ctx, regs, 0)) { + force_sigsegv(SIGSEGV); + return -EFAULT; + } set_thread_flag(TIF_RESTOREALL); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 1831bba0582e..d8de622c9e4a 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -703,15 +703,18 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ - if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) - do_exit(SIGSEGV); + if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) { + force_sigsegv(SIGSEGV); + return -EFAULT; + } set_current_blocked(&set); if (!user_read_access_begin(new_ctx, ctx_size)) return -EFAULT; if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { user_read_access_end(); - do_exit(SIGSEGV); + force_sigsegv(SIGSEGV); + return -EFAULT; } user_read_access_end(); From 984bd71fb32032ef395a895916853964166b322b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:54 -0500 Subject: [PATCH 08/23] signal/sparc: In setup_tsb_params convert open coded BUG into BUG The function setup_tsb_params has exactly one caller tsb_grow. The function tsb_grow passes in a tsb_bytes value that is between 8192 and 1048576 inclusive, and is guaranteed to be a power of 2. The function setup_tsb_params verifies this property with a switch statement and then prints an error and causes the task to exit if this is not true. In practice that print statement can never be reached because tsb_grow never passes in a bad tsb_size. So if tsb_size ever gets a bad value that is a kernel bug. So replace the do_exit which is effectively an open coded version of BUG() with an actuall call to BUG(). Making it clearer that this is a case that can never, and should never happen. Cc: David Miller Cc: sparclinux@vger.kernel.org Link: https://lkml.kernel.org/r/20211020174406.17889-8-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/mm/tsb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 0dce4b7ff73e..912205787161 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -266,7 +266,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign default: printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", current->comm, current->pid, tsb_bytes); - do_exit(SIGSEGV); + BUG(); } tte |= pte_sz_bits(page_sz); From 1a4d21a23c4ca7467726be7db9ae8077a62b2c62 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:55 -0500 Subject: [PATCH 09/23] signal/vm86_32: Replace open coded BUG_ON with an actual BUG_ON The function save_v86_state is only called when userspace was operating in vm86 mode before entering the kernel. Not having vm86 state in the task_struct should never happen. So transform the hand rolled BUG_ON into an actual BUG_ON to make it clear what is happening. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: H Peter Anvin Link: https://lkml.kernel.org/r/20211020174406.17889-9-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/x86/kernel/vm86_32.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e5a7a10a0164..63486da77272 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -106,10 +106,8 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) */ local_irq_enable(); - if (!vm86 || !vm86->user_vm86) { - pr_alert("no user_vm86: BAD\n"); - do_exit(SIGSEGV); - } + BUG_ON(!vm86 || !vm86->user_vm86); + set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask); user = vm86->user_vm86; From 1fbd60df8a852d9c55de8cd3621899cf4c72a5b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:56 -0500 Subject: [PATCH 10/23] signal/vm86_32: Properly send SIGSEGV when the vm86 state cannot be saved. Update save_v86_state to always complete all of it's work except possibly some of the copies to userspace even if save_v86_state takes a fault. This ensures that the kernel is always in a sane state, even if userspace has done something silly. When save_v86_state takes a fault update it to force userspace to take a SIGSEGV and terminate the userspace application. As Andy pointed out in review of the first version of this change there are races between sigaction and the application terinating. Now that the code has been modified to always perform all save_v86_state's work (except possibly copying to userspace) those races do not matter from a kernel perspective. Forcing the userspace application to terminate (by resetting it's handler to SIGDFL) is there to keep everything as close to the current behavior as possible while removing the unique (and difficult to maintain) use of do_exit. If this new SIGSEGV happens during handle_signal the next time around the exit_to_user_mode_loop, SIGSEGV will be delivered to userspace. All of the callers of handle_vm86_trap and handle_vm86_fault run the exit_to_user_mode_loop before they return to userspace any signal sent to the current task during their execution will be delivered to the current task before that tasks exits to usermode. Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: H Peter Anvin v1: https://lkml.kernel.org/r/20211020174406.17889-10-ebiederm@xmission.com Link: https://lkml.kernel.org/r/877de1xcr6.fsf_-_@disp2133 Signed-off-by: Eric W. Biederman --- arch/x86/kernel/vm86_32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 63486da77272..933cafab7832 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -140,6 +140,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) user_access_end(); +exit_vm86: preempt_disable(); tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; @@ -159,7 +160,8 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) user_access_end(); Efault: pr_alert("could not access userspace vm86 info\n"); - do_exit(SIGSEGV); + force_sigsegv(SIGSEGV); + goto exit_vm86; } static int do_vm86_irq_handling(int subfunction, int irqnumber); From 9bc508cf0791c8e5a37696de1a046d746fcbd9d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:57 -0500 Subject: [PATCH 11/23] signal/s390: Use force_sigsegv in default_trap_handler Reading the history it is unclear why default_trap_handler calls do_exit. It is not even menthioned in the commit where the change happened. My best guess is that because it is unknown why the exception happened it was desired to guarantee the process never returned to userspace. Using do_exit(SIGSEGV) has the problem that it will only terminate one thread of a process, leaving the process in an undefined state. Use force_sigsegv(SIGSEGV) instead which effectively has the same behavior except that is uses the ordinary signal mechanism and terminates all threads of a process and is generally well defined. Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: linux-s390@vger.kernel.org Fixes: ca2ab03237ec ("[PATCH] s390: core changes") History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Reviewed-by: Christian Borntraeger Link: https://lkml.kernel.org/r/20211020174406.17889-11-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/s390/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index bcefc2173de4..51729ea2cf8e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { report_user_fault(regs, SIGSEGV, 0); - do_exit(SIGSEGV); + force_sigsegv(SIGSEGV); } else die(regs, "Unknown program exception"); } From 111e70490d2a673730b89c010b61cea2d982d121 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:58 -0500 Subject: [PATCH 12/23] exit/kthread: Have kernel threads return instead of calling do_exit In 2009 Oleg reworked[1] the kernel threads so that it is not necessary to call do_exit if you are not using kthread_stop(). Remove the explicit calls of do_exit and complete_and_exit (with a NULL completion) that were previously necessary. [1] 63706172f332 ("kthreads: rework kthread_stop()") Link: https://lkml.kernel.org/r/20211020174406.17889-12-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/firmware/stratix10-svc.c | 4 ++-- drivers/soc/ti/wkup_m3_ipc.c | 2 +- fs/ocfs2/journal.c | 5 +---- kernel/kthread.c | 2 +- net/batman-adv/tp_meter.c | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 2a7687911c09..29c0a616b317 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -520,7 +520,7 @@ static int svc_normal_to_secure_thread(void *data) * physical address of memory block reserved by secure monitor software at * secure world. * - * svc_normal_to_secure_shm_thread() calls do_exit() directly since it is a + * svc_normal_to_secure_shm_thread() terminates directly since it is a * standlone thread for which no one will call kthread_stop() or return when * 'kthread_should_stop()' is true. */ @@ -544,7 +544,7 @@ static int svc_normal_to_secure_shm_thread(void *data) } complete(&sh_mem->sync_complete); - do_exit(0); + return 0; } /** diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 09abd17065ba..0733443a2631 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -426,7 +426,7 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) else m3_ipc_state = m3_ipc; - do_exit(0); + return 0; } static int wkup_m3_ipc_probe(struct platform_device *pdev) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f15750aac5d..329986f12db3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1497,10 +1497,7 @@ static int __ocfs2_recovery_thread(void *arg) if (quota_enabled) kfree(rm_quota); - /* no one is callint kthread_stop() for us so the kthread() api - * requires that we call do_exit(). And it isn't exported, but - * complete_and_exit() seems to be a minimal wrapper around it. */ - complete_and_exit(NULL, status); + return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) diff --git a/kernel/kthread.c b/kernel/kthread.c index 5b37a8567168..33e17beaa682 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -433,7 +433,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), * If thread is going to be bound on a particular cpu, give its node * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE. * When woken, the thread will run @threadfn() with @data as its - * argument. @threadfn() can either call do_exit() directly if it is a + * argument. @threadfn() can either return directly if it is a * standalone thread for which no one will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 56b9fe97b3b4..1252540cde17 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -890,7 +890,7 @@ static int batadv_tp_send(void *arg) batadv_tp_vars_put(tp_vars); - do_exit(0); + return 0; } /** From 26d5badbccddcc063dc5174a2baffd13a23322aa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:59 -0500 Subject: [PATCH 13/23] signal: Implement force_fatal_sig Add a simple helper force_fatal_sig that causes a signal to be delivered to a process as if the signal handler was set to SIG_DFL. Reimplement force_sigsegv based upon this new helper. This fixes force_sigsegv so that when it forces the default signal handler to be used the code now forces the signal to be unblocked as well. Reusing the tested logic in force_sig_info_to_task that was built for force_sig_seccomp this makes the implementation trivial. This is interesting both because it makes force_sigsegv simpler and because there are a couple of buggy places in the kernel that call do_exit(SIGILL) or do_exit(SIGSYS) because there is no straight forward way today for those places to simply force the exit of a process with the chosen signal. Creating force_fatal_sig allows those places to be implemented with normal signal exits. Link: https://lkml.kernel.org/r/20211020174406.17889-13-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/sched/signal.h | 1 + kernel/signal.c | 26 +++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index e5f4ce622ee6..e2dc9f119ada 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -338,6 +338,7 @@ extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); +extern void force_fatal_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); diff --git a/kernel/signal.c b/kernel/signal.c index 952741f6d0f9..6a5e1802b9a2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1662,6 +1662,19 @@ void force_sig(int sig) } EXPORT_SYMBOL(force_sig); +void force_fatal_sig(int sig) +{ + struct kernel_siginfo info; + + clear_siginfo(&info); + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_KERNEL; + info.si_pid = 0; + info.si_uid = 0; + force_sig_info_to_task(&info, current, true); +} + /* * When things go south during signal handling, we * will force a SIGSEGV. And if the signal that caused @@ -1670,15 +1683,10 @@ EXPORT_SYMBOL(force_sig); */ void force_sigsegv(int sig) { - struct task_struct *p = current; - - if (sig == SIGSEGV) { - unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; - spin_unlock_irqrestore(&p->sighand->siglock, flags); - } - force_sig(SIGSEGV); + if (sig == SIGSEGV) + force_fatal_sig(SIGSEGV); + else + force_sig(SIGSEGV); } int force_sig_fault_to_task(int sig, int code, void __user *addr From 941edc5bf174b67f94db19817cbeab0a93e0c32a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:00 -0500 Subject: [PATCH 14/23] exit/syscall_user_dispatch: Send ordinary signals on failure Use force_fatal_sig instead of calling do_exit directly. This ensures the ordinary signal handling path gets invoked, core dumps as appropriate get created, and for multi-threaded processes all of the threads are terminated not just a single thread. When asked Gabriel Krisman Bertazi said [1]: > ebiederm@xmission.com (Eric W. Biederman) asked: > > > Why does do_syscal_user_dispatch call do_exit(SIGSEGV) and > > do_exit(SIGSYS) instead of force_sig(SIGSEGV) and force_sig(SIGSYS)? > > > > Looking at the code these cases are not expected to happen, so I would > > be surprised if userspace depends on any particular behaviour on the > > failure path so I think we can change this. > > Hi Eric, > > There is not really a good reason, and the use case that originated the > feature doesn't rely on it. > > Unless I'm missing yet another problem and others correct me, I think > it makes sense to change it as you described. > > > Is using do_exit in this way something you copied from seccomp? > > I'm not sure, its been a while, but I think it might be just that. The > first prototype of SUD was implemented as a seccomp mode. If at some point it becomes interesting we could relax "force_fatal_sig(SIGSEGV)" to instead say "force_sig_fault(SIGSEGV, SEGV_MAPERR, sd->selector)". I avoid doing that in this patch to avoid making it possible to catch currently uncatchable signals. Cc: Gabriel Krisman Bertazi Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski [1] https://lkml.kernel.org/r/87mtr6gdvi.fsf@collabora.com Link: https://lkml.kernel.org/r/20211020174406.17889-14-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/entry/syscall_user_dispatch.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c index c240302f56e2..4508201847d2 100644 --- a/kernel/entry/syscall_user_dispatch.c +++ b/kernel/entry/syscall_user_dispatch.c @@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs) * access_ok() is performed once, at prctl time, when * the selector is loaded by userspace. */ - if (unlikely(__get_user(state, sd->selector))) - do_exit(SIGSEGV); + if (unlikely(__get_user(state, sd->selector))) { + force_fatal_sig(SIGSEGV); + return true; + } if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW)) return false; - if (state != SYSCALL_DISPATCH_FILTER_BLOCK) - do_exit(SIGSYS); + if (state != SYSCALL_DISPATCH_FILTER_BLOCK) { + force_fatal_sig(SIGSYS); + return true; + } } sd->on_dispatch = true; From c317d306d55079525c9610267fdaf3a8a6d2f08b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:01 -0500 Subject: [PATCH 15/23] signal/sparc32: Exit with a fatal signal when try_to_clear_window_buffer fails The function try_to_clear_window_buffer is only called from rtrap_32.c. After it is called the signal pending state is retested, and signals are handled if TIF_SIGPENDING is set. This allows try_to_clear_window_buffer to call force_fatal_signal and then rely on the signal being delivered to kill the process, without any danger of returning to userspace, or otherwise using possible corrupt state on failure. The functional difference between force_fatal_sig and do_exit is that do_exit will only terminate a single thread, and will never trigger a core-dump. A multi-threaded program for which a single thread terminates unexpectedly is hard to reason about. Calling force_fatal_sig does not give userspace a chance to catch the signal, but otherwise is an ordinary fatal signal exit, and it will trigger a coredump of the offending process if core dumps are enabled. Cc: David Miller Cc: sparclinux@vger.kernel.org Link: https://lkml.kernel.org/r/20211020174406.17889-15-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/kernel/windows.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index 69a6ba6e9293..bbbd40cc6b28 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -121,8 +121,10 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who) if ((sp & 7) || copy_to_user((char __user *) sp, &tp->reg_window[window], - sizeof(struct reg_window32))) - do_exit(SIGILL); + sizeof(struct reg_window32))) { + force_fatal_sig(SIGILL); + return; + } } tp->w_saved = 0; } From 086ec444f86660e103de8945d0dcae9b67132ac9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:02 -0500 Subject: [PATCH 16/23] signal/sparc32: In setup_rt_frame and setup_fram use force_fatal_sig Modify the 32bit version of setup_rt_frame and setup_frame to act similar to the 64bit version of setup_rt_frame and fail with a signal instead of calling do_exit. Replacing do_exit(SIGILL) with force_fatal_signal(SIGILL) ensures that the process will be terminated cleanly when the stack frame is invalid, instead of just killing off a single thread and leaving the process is a weird state. Cc: David Miller Cc: sparclinux@vger.kernel.org Link: https://lkml.kernel.org/r/20211020174406.17889-16-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/sparc/kernel/signal_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 02f3ad55dfe3..cd677bc564a7 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + force_fatal_sig(SIGILL); return -EINVAL; } @@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, sf = (struct rt_signal_frame __user *) get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + force_fatal_sig(SIGILL); return -EINVAL; } From 695dd0d634df8903e5ead8aa08d326f63b23368a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:03 -0500 Subject: [PATCH 17/23] signal/x86: In emulate_vsyscall force a signal instead of calling do_exit Directly calling do_exit with a signal number has the problem that all of the side effects of the signal don't happen, such as killing all of the threads of a process instead of just the calling thread. So replace do_exit(SIGSYS) with force_fatal_sig(SIGSYS) which causes the signal handling to take it's normal path and work as expected. Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20211020174406.17889-17-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- arch/x86/entry/vsyscall/vsyscall_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 1b40b9297083..0b6b277ee050 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -226,7 +226,8 @@ bool emulate_vsyscall(unsigned long error_code, if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); - do_exit(SIGSYS); + force_fatal_sig(SIGSYS); + return true; } regs->orig_ax = -1; if (tmp) From 501c88722797a1923145658cce85fb3661121832 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:04 -0500 Subject: [PATCH 18/23] exit/rtl8723bs: Replace the macro thread_exit with a simple return 0 Every place thread_exit is called is at the end of a function started with kthread_run. The code in kthread_run has arranged things so a kernel thread can just return and do_exit will be called. So just have the threads return instead of calling complete_and_exit. Link: https://lkml.kernel.org/r/20211020174406.17889-18-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/staging/rtl8723bs/core/rtw_cmd.c | 2 +- drivers/staging/rtl8723bs/core/rtw_xmit.c | 2 +- drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c | 2 +- drivers/staging/rtl8723bs/include/osdep_service_linux.h | 2 -- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_cmd.c b/drivers/staging/rtl8723bs/core/rtw_cmd.c index d494c06dab96..8e69f9c10f5c 100644 --- a/drivers/staging/rtl8723bs/core/rtw_cmd.c +++ b/drivers/staging/rtl8723bs/core/rtw_cmd.c @@ -524,7 +524,7 @@ int rtw_cmd_thread(void *context) complete(&pcmdpriv->terminate_cmdthread_comp); atomic_set(&(pcmdpriv->cmdthd_running), false); - thread_exit(); + return 0; } /* diff --git a/drivers/staging/rtl8723bs/core/rtw_xmit.c b/drivers/staging/rtl8723bs/core/rtw_xmit.c index 79e4d7df1ef5..0c357bc2478c 100644 --- a/drivers/staging/rtl8723bs/core/rtw_xmit.c +++ b/drivers/staging/rtl8723bs/core/rtw_xmit.c @@ -2491,7 +2491,7 @@ int rtw_xmit_thread(void *context) complete(&padapter->xmitpriv.terminate_xmitthread_comp); - thread_exit(); + return 0; } void rtw_sctx_init(struct submit_ctx *sctx, int timeout_ms) diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index 156d6aba18ca..2b9a41b12d1f 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -435,7 +435,7 @@ int rtl8723bs_xmit_thread(void *context) complete(&pxmitpriv->SdioXmitTerminate); - thread_exit(); + return 0; } s32 rtl8723bs_mgnt_xmit( diff --git a/drivers/staging/rtl8723bs/include/osdep_service_linux.h b/drivers/staging/rtl8723bs/include/osdep_service_linux.h index 3492ec1efd1e..188ed7e26550 100644 --- a/drivers/staging/rtl8723bs/include/osdep_service_linux.h +++ b/drivers/staging/rtl8723bs/include/osdep_service_linux.h @@ -45,8 +45,6 @@ spinlock_t lock; }; - #define thread_exit() complete_and_exit(NULL, 0) - static inline struct list_head *get_next(struct list_head *list) { return list->next; From 99d7ef1e4792de3d8658f967539bdc6df2b03fa4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:05 -0500 Subject: [PATCH 19/23] exit/rtl8712: Replace the macro thread_exit with a simple return 0 The macro thread_exit is called is at the end of a function started with kthread_run. The code in kthread_run has arranged things so a kernel thread can just return and do_exit will be called. So just have the cmd_thread return instead of calling complete_and_exit. Link: https://lkml.kernel.org/r/20211020174406.17889-19-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/staging/rtl8712/osdep_service.h | 1 - drivers/staging/rtl8712/rtl8712_cmd.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/rtl8712/osdep_service.h b/drivers/staging/rtl8712/osdep_service.h index d33ddffb7ad9..0d9bb42cbc58 100644 --- a/drivers/staging/rtl8712/osdep_service.h +++ b/drivers/staging/rtl8712/osdep_service.h @@ -37,7 +37,6 @@ struct __queue { #define _pkt struct sk_buff #define _buffer unsigned char -#define thread_exit() complete_and_exit(NULL, 0) #define _init_queue(pqueue) \ do { \ diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index e9294e1ed06e..2326aae6709e 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -393,7 +393,7 @@ int r8712_cmd_thread(void *context) r8712_free_cmd_obj(pcmd); } while (1); complete(&pcmdpriv->terminate_cmdthread_comp); - thread_exit(); + return 0; } void r8712_event_handle(struct _adapter *padapter, __le32 *peventbuf) From 0fdc0c4279c822eda8f5ce3b7689d34f4cac2e82 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:06 -0500 Subject: [PATCH 20/23] exit/r8188eu: Replace the macro thread_exit with a simple return 0 The macro thread_exit is called is at the end of functions started with kthread_run. The code in kthread_run has arranged things so a kernel thread can just return and do_exit will be called. So just have rtw_cmd_thread and mp_xmit_packet_thread return instead of calling complete_and_exit. Link: https://lkml.kernel.org/r/20211020174406.17889-20-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/staging/r8188eu/core/rtw_cmd.c | 2 +- drivers/staging/r8188eu/core/rtw_mp.c | 2 +- drivers/staging/r8188eu/include/osdep_service.h | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c index ce73ac7cf973..d37c9463eecc 100644 --- a/drivers/staging/r8188eu/core/rtw_cmd.c +++ b/drivers/staging/r8188eu/core/rtw_cmd.c @@ -347,7 +347,7 @@ int rtw_cmd_thread(void *context) up(&pcmdpriv->terminate_cmdthread_sema); - thread_exit(); + return 0; } u8 rtw_setstandby_cmd(struct adapter *padapter, uint action) diff --git a/drivers/staging/r8188eu/core/rtw_mp.c b/drivers/staging/r8188eu/core/rtw_mp.c index dabdd0406f30..3945c4efe45a 100644 --- a/drivers/staging/r8188eu/core/rtw_mp.c +++ b/drivers/staging/r8188eu/core/rtw_mp.c @@ -580,7 +580,7 @@ static int mp_xmit_packet_thread(void *context) pmptx->pallocated_buf = NULL; pmptx->stop = 1; - thread_exit(); + return 0; } void fill_txdesc_for_mp(struct adapter *padapter, struct tx_desc *ptxdesc) diff --git a/drivers/staging/r8188eu/include/osdep_service.h b/drivers/staging/r8188eu/include/osdep_service.h index 029aa4e92c9b..afbffb551f9b 100644 --- a/drivers/staging/r8188eu/include/osdep_service.h +++ b/drivers/staging/r8188eu/include/osdep_service.h @@ -49,8 +49,6 @@ struct __queue { spinlock_t lock; }; -#define thread_exit() complete_and_exit(NULL, 0) - static inline struct list_head *get_list_head(struct __queue *queue) { return (&(queue->queue)); From e21294a7aaae32c5d7154b187113a04db5852e37 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 25 Oct 2021 10:50:57 -0500 Subject: [PATCH 21/23] signal: Replace force_sigsegv(SIGSEGV) with force_fatal_sig(SIGSEGV) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that force_fatal_sig exists it is unnecessary and a bit confusing to use force_sigsegv in cases where the simpler force_fatal_sig is wanted. So change every instance we can to make the code clearer. Acked-by: Geert Uytterhoeven Reviewed-by: Philippe Mathieu-Daudé Link: https://lkml.kernel.org/r/877de7jrev.fsf@disp2133 Signed-off-by: "Eric W. Biederman" --- arch/arc/kernel/process.c | 2 +- arch/m68k/kernel/traps.c | 2 +- arch/powerpc/kernel/signal_32.c | 2 +- arch/powerpc/kernel/signal_64.c | 4 ++-- arch/s390/kernel/traps.c | 2 +- arch/um/kernel/trap.c | 2 +- arch/x86/kernel/vm86_32.c | 2 +- fs/exec.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 3793876f42d9..8e90052f6f05 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -294,7 +294,7 @@ int elf_check_arch(const struct elf32_hdr *x) eflags = x->e_flags; if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) { pr_err("ABI mismatch - you need newer toolchain\n"); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return 0; } diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 5b19fcdcd69e..74045d164ddb 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1150,7 +1150,7 @@ asmlinkage void set_esp0(unsigned long ssp) */ asmlinkage void fpsp040_die(void) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); } #ifdef CONFIG_M68KFPU_EMU diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 666f3da41232..933ab95805a6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1063,7 +1063,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(new_ctx, regs, 0)) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d8de622c9e4a..8ead9b3f47c6 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -704,7 +704,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return -EFAULT; } set_current_blocked(&set); @@ -713,7 +713,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, return -EFAULT; if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { user_read_access_end(); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); return -EFAULT; } user_read_access_end(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 51729ea2cf8e..01a7c68dcfb6 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { report_user_fault(regs, SIGSEGV, 0); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); } else die(regs, "Unknown program exception"); } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 3198c4767387..c32efb09db21 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -158,7 +158,7 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) void fatal_sigsegv(void) { - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 933cafab7832..f14f69d7aa3c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -160,7 +160,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) user_access_end(); Efault: pr_alert("could not access userspace vm86 info\n"); - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); goto exit_vm86; } diff --git a/fs/exec.c b/fs/exec.c index a098c133d8d7..ac7b51b51f38 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1852,7 +1852,7 @@ static int bprm_execve(struct linux_binprm *bprm, * SIGSEGV. */ if (bprm->point_of_no_return && !fatal_signal_pending(current)) - force_sigsegv(SIGSEGV); + force_fatal_sig(SIGSEGV); out_unmark: current->fs->in_exec = 0; From 00b06da29cf9dc633cdba87acd3f57f4df3fd5c7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 29 Oct 2021 09:14:19 -0500 Subject: [PATCH 22/23] signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed As Andy pointed out that there are races between force_sig_info_to_task and sigaction[1] when force_sig_info_task. As Kees discovered[2] ptrace is also able to change these signals. In the case of seeccomp killing a process with a signal it is a security violation to allow the signal to be caught or manipulated. Solve this problem by introducing a new flag SA_IMMUTABLE that prevents sigaction and ptrace from modifying these forced signals. This flag is carefully made kernel internal so that no new ABI is introduced. Longer term I think this can be solved by guaranteeing short circuit delivery of signals in this case. Unfortunately reliable and guaranteed short circuit delivery of these signals is still a ways off from being implemented, tested, and merged. So I have implemented a much simpler alternative for now. [1] https://lkml.kernel.org/r/b5d52d25-7bde-4030-a7b1-7c6f8ab90660@www.fastmail.com [2] https://lkml.kernel.org/r/202110281136.5CE65399A7@keescook Cc: stable@vger.kernel.org Fixes: 307d522f5eb8 ("signal/seccomp: Refactor seccomp signal and coredump generation") Tested-by: Andrea Righi Tested-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/signal_types.h | 3 +++ include/uapi/asm-generic/signal-defs.h | 1 + kernel/signal.c | 8 +++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 34cb28b8f16c..a70b2bdbf4d9 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -70,6 +70,9 @@ struct ksignal { int sig; }; +/* Used to kill the race between sigaction and forced signals */ +#define SA_IMMUTABLE 0x00800000 + #ifndef __ARCH_UAPI_SA_FLAGS #ifdef SA_RESTORER #define __ARCH_UAPI_SA_FLAGS SA_RESTORER diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h index fe929e7b77ca..7572f2f46ee8 100644 --- a/include/uapi/asm-generic/signal-defs.h +++ b/include/uapi/asm-generic/signal-defs.h @@ -45,6 +45,7 @@ #define SA_UNSUPPORTED 0x00000400 #define SA_EXPOSE_TAGBITS 0x00000800 /* 0x00010000 used on mips */ +/* 0x00800000 used for internal SA_IMMUTABLE */ /* 0x01000000 used on x86 */ /* 0x02000000 used on x86 */ /* diff --git a/kernel/signal.c b/kernel/signal.c index 6a5e1802b9a2..056a107e3cbc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1336,6 +1336,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool blocked = sigismember(&t->blocked, sig); if (blocked || ignored || sigdfl) { action->sa.sa_handler = SIG_DFL; + action->sa.sa_flags |= SA_IMMUTABLE; if (blocked) { sigdelset(&t->blocked, sig); recalc_sigpending_and_wake(t); @@ -2760,7 +2761,8 @@ bool get_signal(struct ksignal *ksig) if (!signr) break; /* will return 0 */ - if (unlikely(current->ptrace) && signr != SIGKILL) { + if (unlikely(current->ptrace) && (signr != SIGKILL) && + !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) { signr = ptrace_signal(signr, &ksig->info); if (!signr) continue; @@ -4110,6 +4112,10 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) k = &p->sighand->action[sig-1]; spin_lock_irq(&p->sighand->siglock); + if (k->sa.sa_flags & SA_IMMUTABLE) { + spin_unlock_irq(&p->sighand->siglock); + return -EINVAL; + } if (oact) *oact = *k; From f91140e4553408cacd326624cd50fc367725e04a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Nov 2021 08:51:12 +0100 Subject: [PATCH 23/23] soc: ti: fix wkup_m3_rproc_boot_thread return type The wkup_m3_rproc_boot_thread() function uses a nonstandard prototype, which broke after Eric's recent cleanup: drivers/soc/ti/wkup_m3_ipc.c: In function 'wkup_m3_rproc_boot_thread': drivers/soc/ti/wkup_m3_ipc.c:429:16: error: 'return' with a value, in function returning void [-Werror=return-type] 429 | return 0; | ^ drivers/soc/ti/wkup_m3_ipc.c:416:13: note: declared here 416 | static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) | ^~~~~~~~~~~~~~~~~~~~~~~~~ Change it to the normal prototype as it should have been from the start. Fixes: 111e70490d2a ("exit/kthread: Have kernel threads return instead of calling do_exit") Fixes: cdd5de500b2c ("soc: ti: Add wkup_m3_ipc driver") Signed-off-by: Arnd Bergmann Link: https://lkml.kernel.org/r/20211105075119.2327190-1-arnd@kernel.org Acked-by: Santosh Shilimkar Acked-by: Tony Lindgren Signed-off-by: Eric W. Biederman --- drivers/soc/ti/wkup_m3_ipc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 0733443a2631..72386bd393fe 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -413,8 +413,9 @@ void wkup_m3_ipc_put(struct wkup_m3_ipc *m3_ipc) } EXPORT_SYMBOL_GPL(wkup_m3_ipc_put); -static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) +static int wkup_m3_rproc_boot_thread(void *arg) { + struct wkup_m3_ipc *m3_ipc = arg; struct device *dev = m3_ipc->dev; int ret; @@ -500,7 +501,7 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) * can boot the wkup_m3 as soon as it's ready without holding * up kernel boot */ - task = kthread_run((void *)wkup_m3_rproc_boot_thread, m3_ipc, + task = kthread_run(wkup_m3_rproc_boot_thread, m3_ipc, "wkup_m3_rproc_loader"); if (IS_ERR(task)) {