linux/arch/arm/kernel/signal.c

728 lines
18 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/arch/arm/kernel/signal.c
*
* Copyright (C) 1995-2009 Russell King
*/
#include <linux/errno.h>
#include <linux/random.h>
#include <linux/signal.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
#include <linux/tracehook.h>
#include <linux/uprobes.h>
arm/syscalls: Optimize address limit check Disable the generic address limit check in favor of an architecture specific optimized implementation. The generic implementation using pending work flags did not work well with ARM and alignment faults. The address limit is checked on each syscall return path to user-mode path as well as the irq user-mode return function. If the address limit was changed, a function is called to report data corruption (stopping the kernel or process based on configuration). The address limit check has to be done before any pending work because they can reset the address limit and the process is killed using a SIGKILL signal. For example the lkdtm address limit check does not work because the signal to kill the process will reset the user-mode address limit. Signed-off-by: Thomas Garnier <thgarnie@google.com> Signed-off-by: Kees Cook <keescook@chromium.org> Tested-by: Kees Cook <keescook@chromium.org> Tested-by: Leonard Crestez <leonard.crestez@nxp.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Pratyush Anand <panand@redhat.com> Cc: Dave Martin <Dave.Martin@arm.com> Cc: Will Drewry <wad@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Andy Lutomirski <luto@amacapital.net> Cc: David Howells <dhowells@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-api@vger.kernel.org Cc: Yonghong Song <yhs@fb.com> Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1504798247-48833-4-git-send-email-keescook@chromium.org
2017-09-07 23:30:46 +08:00
#include <linux/syscalls.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
#include <asm/traps.h>
#include <asm/unistd.h>
#include <asm/vfp.h>
#include "signal.h"
extern const unsigned long sigreturn_codes[17];
static unsigned long signal_return_offset;
#ifdef CONFIG_CRUNCH
static int preserve_crunch_context(struct crunch_sigframe __user *frame)
{
char kbuf[sizeof(*frame) + 8];
struct crunch_sigframe *kframe;
/* the crunch context must be 64 bit aligned */
kframe = (struct crunch_sigframe *)((unsigned long)(kbuf + 8) & ~7);
kframe->magic = CRUNCH_MAGIC;
kframe->size = CRUNCH_STORAGE_SIZE;
crunch_task_copy(current_thread_info(), &kframe->storage);
return __copy_to_user(frame, kframe, sizeof(*frame));
}
static int restore_crunch_context(char __user **auxp)
{
struct crunch_sigframe __user *frame =
(struct crunch_sigframe __user *)*auxp;
char kbuf[sizeof(*frame) + 8];
struct crunch_sigframe *kframe;
/* the crunch context must be 64 bit aligned */
kframe = (struct crunch_sigframe *)((unsigned long)(kbuf + 8) & ~7);
if (__copy_from_user(kframe, frame, sizeof(*frame)))
return -1;
if (kframe->magic != CRUNCH_MAGIC ||
kframe->size != CRUNCH_STORAGE_SIZE)
return -1;
*auxp += CRUNCH_STORAGE_SIZE;
crunch_task_restore(current_thread_info(), &kframe->storage);
return 0;
}
#endif
#ifdef CONFIG_IWMMXT
static int preserve_iwmmxt_context(struct iwmmxt_sigframe __user *frame)
{
char kbuf[sizeof(*frame) + 8];
struct iwmmxt_sigframe *kframe;
int err = 0;
/* the iWMMXt context must be 64 bit aligned */
kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
if (test_thread_flag(TIF_USING_IWMMXT)) {
kframe->magic = IWMMXT_MAGIC;
kframe->size = IWMMXT_STORAGE_SIZE;
iwmmxt_task_copy(current_thread_info(), &kframe->storage);
} else {
/*
* For bug-compatibility with older kernels, some space
* has to be reserved for iWMMXt even if it's not used.
* Set the magic and size appropriately so that properly
* written userspace can skip it reliably:
*/
*kframe = (struct iwmmxt_sigframe) {
.magic = DUMMY_MAGIC,
.size = IWMMXT_STORAGE_SIZE,
};
}
err = __copy_to_user(frame, kframe, sizeof(*kframe));
return err;
}
static int restore_iwmmxt_context(char __user **auxp)
{
struct iwmmxt_sigframe __user *frame =
(struct iwmmxt_sigframe __user *)*auxp;
char kbuf[sizeof(*frame) + 8];
struct iwmmxt_sigframe *kframe;
/* the iWMMXt context must be 64 bit aligned */
kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
if (__copy_from_user(kframe, frame, sizeof(*frame)))
return -1;
/*
* For non-iWMMXt threads: a single iwmmxt_sigframe-sized dummy
* block is discarded for compatibility with setup_sigframe() if
* present, but we don't mandate its presence. If some other
* magic is here, it's not for us:
*/
if (!test_thread_flag(TIF_USING_IWMMXT) &&
kframe->magic != DUMMY_MAGIC)
return 0;
if (kframe->size != IWMMXT_STORAGE_SIZE)
return -1;
if (test_thread_flag(TIF_USING_IWMMXT)) {
if (kframe->magic != IWMMXT_MAGIC)
return -1;
iwmmxt_task_restore(current_thread_info(), &kframe->storage);
}
*auxp += IWMMXT_STORAGE_SIZE;
return 0;
}
#endif
#ifdef CONFIG_VFP
static int preserve_vfp_context(struct vfp_sigframe __user *frame)
{
struct vfp_sigframe kframe;
int err = 0;
memset(&kframe, 0, sizeof(kframe));
kframe.magic = VFP_MAGIC;
kframe.size = VFP_STORAGE_SIZE;
err = vfp_preserve_user_clear_hwstate(&kframe.ufp, &kframe.ufp_exc);
if (err)
return err;
return __copy_to_user(frame, &kframe, sizeof(kframe));
}
static int restore_vfp_context(char __user **auxp)
{
struct vfp_sigframe frame;
int err;
err = __copy_from_user(&frame, *auxp, sizeof(frame));
if (err)
return err;
if (frame.magic != VFP_MAGIC || frame.size != VFP_STORAGE_SIZE)
return -EINVAL;
*auxp += sizeof(frame);
return vfp_restore_user_hwstate(&frame.ufp, &frame.ufp_exc);
}
#endif
/*
* Do a signal return; undo the signal stack. These are aligned to 64-bit.
*/
static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf)
{
struct sigcontext context;
char __user *aux;
sigset_t set;
int err;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (err == 0)
set_current_blocked(&set);
err |= __copy_from_user(&context, &sf->uc.uc_mcontext, sizeof(context));
if (err == 0) {
regs->ARM_r0 = context.arm_r0;
regs->ARM_r1 = context.arm_r1;
regs->ARM_r2 = context.arm_r2;
regs->ARM_r3 = context.arm_r3;
regs->ARM_r4 = context.arm_r4;
regs->ARM_r5 = context.arm_r5;
regs->ARM_r6 = context.arm_r6;
regs->ARM_r7 = context.arm_r7;
regs->ARM_r8 = context.arm_r8;
regs->ARM_r9 = context.arm_r9;
regs->ARM_r10 = context.arm_r10;
regs->ARM_fp = context.arm_fp;
regs->ARM_ip = context.arm_ip;
regs->ARM_sp = context.arm_sp;
regs->ARM_lr = context.arm_lr;
regs->ARM_pc = context.arm_pc;
regs->ARM_cpsr = context.arm_cpsr;
}
err |= !valid_user_regs(regs);
aux = (char __user *) sf->uc.uc_regspace;
#ifdef CONFIG_CRUNCH
if (err == 0)
err |= restore_crunch_context(&aux);
#endif
#ifdef CONFIG_IWMMXT
if (err == 0)
err |= restore_iwmmxt_context(&aux);
#endif
#ifdef CONFIG_VFP
if (err == 0)
err |= restore_vfp_context(&aux);
#endif
return err;
}
asmlinkage int sys_sigreturn(struct pt_regs *regs)
{
struct sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
all arches, signal: move restart_block to struct task_struct If an attacker can cause a controlled kernel stack overflow, overwriting the restart block is a very juicy exploit target. This is because the restart_block is held in the same memory allocation as the kernel stack. Moving the restart block to struct task_struct prevents this exploit by making the restart_block harder to locate. Note that there are other fields in thread_info that are also easy targets, at least on some architectures. It's also a decent simplification, since the restart code is more or less identical on all architectures. [james.hogan@imgtec.com: metag: align thread_info::supervisor_stack] Signed-off-by: Andy Lutomirski <luto@amacapital.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Kees Cook <keescook@chromium.org> Cc: David Miller <davem@davemloft.net> Acked-by: Richard Weinberger <richard@nod.at> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Matt Turner <mattst88@gmail.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Steven Miao <realmz6@gmail.com> Cc: Mark Salter <msalter@redhat.com> Cc: Aurelien Jacquiot <a-jacquiot@ti.com> Cc: Mikael Starvik <starvik@axis.com> Cc: Jesper Nilsson <jesper.nilsson@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Michal Simek <monstr@monstr.eu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Helge Deller <deller@gmx.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Tested-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Chen Liqin <liqin.linux@gmail.com> Cc: Lennox Wu <lennox.wu@gmail.com> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Chris Zankel <chris@zankel.net> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Guenter Roeck <linux@roeck-us.net> Signed-off-by: James Hogan <james.hogan@imgtec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-13 07:01:14 +08:00
current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
* then 'sp' should be word aligned here. If it's
* not, then the user is trying to mess with us.
*/
if (regs->ARM_sp & 7)
goto badframe;
frame = (struct sigframe __user *)regs->ARM_sp;
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
if (!access_ok(frame, sizeof (*frame)))
goto badframe;
if (restore_sigframe(regs, frame))
goto badframe;
return regs->ARM_r0;
badframe:
force_sig(SIGSEGV);
return 0;
}
asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
/* Always make any pending restarted system calls return -EINTR */
all arches, signal: move restart_block to struct task_struct If an attacker can cause a controlled kernel stack overflow, overwriting the restart block is a very juicy exploit target. This is because the restart_block is held in the same memory allocation as the kernel stack. Moving the restart block to struct task_struct prevents this exploit by making the restart_block harder to locate. Note that there are other fields in thread_info that are also easy targets, at least on some architectures. It's also a decent simplification, since the restart code is more or less identical on all architectures. [james.hogan@imgtec.com: metag: align thread_info::supervisor_stack] Signed-off-by: Andy Lutomirski <luto@amacapital.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Kees Cook <keescook@chromium.org> Cc: David Miller <davem@davemloft.net> Acked-by: Richard Weinberger <richard@nod.at> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Matt Turner <mattst88@gmail.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Steven Miao <realmz6@gmail.com> Cc: Mark Salter <msalter@redhat.com> Cc: Aurelien Jacquiot <a-jacquiot@ti.com> Cc: Mikael Starvik <starvik@axis.com> Cc: Jesper Nilsson <jesper.nilsson@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Michal Simek <monstr@monstr.eu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Helge Deller <deller@gmx.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Tested-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Chen Liqin <liqin.linux@gmail.com> Cc: Lennox Wu <lennox.wu@gmail.com> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Chris Zankel <chris@zankel.net> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Guenter Roeck <linux@roeck-us.net> Signed-off-by: James Hogan <james.hogan@imgtec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-13 07:01:14 +08:00
current->restart_block.fn = do_no_restart_syscall;
/*
* Since we stacked the signal on a 64-bit boundary,
* then 'sp' should be word aligned here. If it's
* not, then the user is trying to mess with us.
*/
if (regs->ARM_sp & 7)
goto badframe;
frame = (struct rt_sigframe __user *)regs->ARM_sp;
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
if (!access_ok(frame, sizeof (*frame)))
goto badframe;
if (restore_sigframe(regs, &frame->sig))
goto badframe;
if (restore_altstack(&frame->sig.uc.uc_stack))
goto badframe;
return regs->ARM_r0;
badframe:
force_sig(SIGSEGV);
return 0;
}
static int
setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set)
{
struct aux_sigframe __user *aux;
struct sigcontext context;
int err = 0;
context = (struct sigcontext) {
.arm_r0 = regs->ARM_r0,
.arm_r1 = regs->ARM_r1,
.arm_r2 = regs->ARM_r2,
.arm_r3 = regs->ARM_r3,
.arm_r4 = regs->ARM_r4,
.arm_r5 = regs->ARM_r5,
.arm_r6 = regs->ARM_r6,
.arm_r7 = regs->ARM_r7,
.arm_r8 = regs->ARM_r8,
.arm_r9 = regs->ARM_r9,
.arm_r10 = regs->ARM_r10,
.arm_fp = regs->ARM_fp,
.arm_ip = regs->ARM_ip,
.arm_sp = regs->ARM_sp,
.arm_lr = regs->ARM_lr,
.arm_pc = regs->ARM_pc,
.arm_cpsr = regs->ARM_cpsr,
.trap_no = current->thread.trap_no,
.error_code = current->thread.error_code,
.fault_address = current->thread.address,
.oldmask = set->sig[0],
};
err |= __copy_to_user(&sf->uc.uc_mcontext, &context, sizeof(context));
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
aux = (struct aux_sigframe __user *) sf->uc.uc_regspace;
#ifdef CONFIG_CRUNCH
if (err == 0)
err |= preserve_crunch_context(&aux->crunch);
#endif
#ifdef CONFIG_IWMMXT
if (err == 0)
err |= preserve_iwmmxt_context(&aux->iwmmxt);
#endif
#ifdef CONFIG_VFP
if (err == 0)
err |= preserve_vfp_context(&aux->vfp);
#endif
err |= __put_user(0, &aux->end_magic);
return err;
}
static inline void __user *
get_sigframe(struct ksignal *ksig, struct pt_regs *regs, int framesize)
{
unsigned long sp = sigsp(regs->ARM_sp, ksig);
void __user *frame;
/*
* ATPCS B01 mandates 8-byte alignment
*/
frame = (void __user *)((sp - framesize) & ~7);
/*
* Check that we can actually write to the signal frame.
*/
Remove 'type' argument from access_ok() function Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 10:57:57 +08:00
if (!access_ok(frame, framesize))
frame = NULL;
return frame;
}
static int
setup_return(struct pt_regs *regs, struct ksignal *ksig,
unsigned long __user *rc, void __user *frame)
{
unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler;
unsigned long handler_fdpic_GOT = 0;
unsigned long retcode;
unsigned int idx, thumb = 0;
unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT);
bool fdpic = IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) &&
(current->personality & FDPIC_FUNCPTRS);
if (fdpic) {
unsigned long __user *fdpic_func_desc =
(unsigned long __user *)handler;
if (__get_user(handler, &fdpic_func_desc[0]) ||
__get_user(handler_fdpic_GOT, &fdpic_func_desc[1]))
return 1;
}
cpsr |= PSR_ENDSTATE;
/*
* Maybe we need to deliver a 32-bit signal to a 26-bit task.
*/
if (ksig->ka.sa.sa_flags & SA_THIRTYTWO)
cpsr = (cpsr & ~MODE_MASK) | USR_MODE;
#ifdef CONFIG_ARM_THUMB
if (elf_hwcap & HWCAP_THUMB) {
/*
* The LSB of the handler determines if we're going to
* be using THUMB or ARM mode for this signal handler.
*/
thumb = handler & 1;
/*
* Clear the If-Then Thumb-2 execution state. ARM spec
* requires this to be all 000s in ARM mode. Snapdragon
* S4/Krait misbehaves on a Thumb=>ARM signal transition
* without this.
*
* We must do this whenever we are running on a Thumb-2
* capable CPU, which includes ARMv6T2. However, we elect
* to always do this to simplify the code; this field is
* marked UNK/SBZP for older architectures.
*/
cpsr &= ~PSR_IT_MASK;
if (thumb) {
cpsr |= PSR_T_BIT;
} else
cpsr &= ~PSR_T_BIT;
}
#endif
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
retcode = (unsigned long)ksig->ka.sa.sa_restorer;
if (fdpic) {
/*
* We need code to load the function descriptor.
* That code follows the standard sigreturn code
* (6 words), and is made of 3 + 2 words for each
* variant. The 4th copied word is the actual FD
* address that the assembly code expects.
*/
idx = 6 + thumb * 3;
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
idx += 5;
if (__put_user(sigreturn_codes[idx], rc ) ||
__put_user(sigreturn_codes[idx+1], rc+1) ||
__put_user(sigreturn_codes[idx+2], rc+2) ||
__put_user(retcode, rc+3))
return 1;
goto rc_finish;
}
} else {
idx = thumb << 1;
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
idx += 3;
/*
* Put the sigreturn code on the stack no matter which return
* mechanism we use in order to remain ABI compliant
*/
if (__put_user(sigreturn_codes[idx], rc) ||
__put_user(sigreturn_codes[idx+1], rc+1))
return 1;
rc_finish:
#ifdef CONFIG_MMU
if (cpsr & MODE32_BIT) {
struct mm_struct *mm = current->mm;
/*
* 32-bit code can use the signal return page
* except when the MPU has protected the vectors
* page from PL0
*/
retcode = mm->context.sigpage + signal_return_offset +
(idx << 2) + thumb;
} else
#endif
{
/*
* Ensure that the instruction cache sees
* the return code written onto the stack.
*/
flush_icache_range((unsigned long)rc,
(unsigned long)(rc + 3));
retcode = ((unsigned long)rc) + thumb;
}
}
regs->ARM_r0 = ksig->sig;
regs->ARM_sp = (unsigned long)frame;
regs->ARM_lr = retcode;
regs->ARM_pc = handler;
if (fdpic)
regs->ARM_r9 = handler_fdpic_GOT;
regs->ARM_cpsr = cpsr;
return 0;
}
static int
setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
{
struct sigframe __user *frame = get_sigframe(ksig, regs, sizeof(*frame));
int err = 0;
if (!frame)
return 1;
/*
* Set uc.uc_flags to a value which sc.trap_no would never have.
*/
err = __put_user(0x5ac3c35a, &frame->uc.uc_flags);
err |= setup_sigframe(frame, regs, set);
if (err == 0)
err = setup_return(regs, ksig, frame->retcode, frame);
return err;
}
static int
setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe __user *frame = get_sigframe(ksig, regs, sizeof(*frame));
int err = 0;
if (!frame)
return 1;
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
err |= __put_user(0, &frame->sig.uc.uc_flags);
err |= __put_user(NULL, &frame->sig.uc.uc_link);
err |= __save_altstack(&frame->sig.uc.uc_stack, regs->ARM_sp);
err |= setup_sigframe(&frame->sig, regs, set);
if (err == 0)
err = setup_return(regs, ksig, frame->sig.retcode, frame);
if (err == 0) {
/*
* For realtime signals we must also set the second and third
* arguments for the signal handler.
* -- Peter Maydell <pmaydell@chiark.greenend.org.uk> 2000-12-06
*/
regs->ARM_r1 = (unsigned long)&frame->info;
regs->ARM_r2 = (unsigned long)&frame->sig.uc;
}
return err;
}
/*
* OK, we're invoking a handler
*/
static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
sigset_t *oldset = sigmask_to_save();
int ret;
/*
* Perform fixup for the pre-signal frame.
*/
rseq_signal_deliver(ksig, regs);
/*
* Set up the stack frame
*/
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
ret = setup_rt_frame(ksig, oldset, regs);
else
ret = setup_frame(ksig, oldset, regs);
/*
* Check that the resulting registers are actually sane.
*/
ret |= !valid_user_regs(regs);
signal_setup_done(ret, ksig, 0);
}
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
* mistake.
*
* Note that we go through the signals twice: once to check the signals that
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
static int do_signal(struct pt_regs *regs, int syscall)
{
ARM: 6892/1: handle ptrace requests to change PC during interrupted system calls GDB's interrupt.exp test cases currenly fail on ARM. The problem is how do_signal handled restarting interrupted system calls: The entry.S assembler code determines that we come from a system call; and that information is passed as "syscall" parameter to do_signal. That routine then calls get_signal_to_deliver [*] and if a signal is to be delivered, calls into handle_signal. If a system call is to be restarted either after the signal handler returns, or if no handler is to be called in the first place, the PC is updated after the get_signal_to_deliver call, either in handle_signal (if we have a handler) or at the end of do_signal (otherwise). Now the problem is that during [*], the call to get_signal_to_deliver, a ptrace intercept may happen. During this intercept, the debugger may change registers, including the PC. This is done by GDB if it wants to execute an "inferior call", i.e. the execution of some code in the debugged program triggered by GDB. To this purpose, GDB will save all registers, allocate a stack frame, set up PC and arguments as appropriate for the call, and point the link register to a dummy breakpoint instruction. Once the process is restarted, it will execute the call and then trap back to the debugger, at which point GDB will restore all registers and continue original execution. This generally works fine. However, now consider what happens when GDB attempts to do exactly that while the process was interrupted during execution of a to-be- restarted system call: do_signal is called with the syscall flag set; it calls get_signal_to_deliver, at which point the debugger takes over and changes the PC to point to a completely different place. Now get_signal_to_deliver returns without a signal to deliver; but now do_signal decides it should be restarting a system call, and decrements the PC by 2 or 4 -- so it now points to 2 or 4 bytes before the function GDB wants to call -- which leads to a subsequent crash. To fix this problem, two things need to be supported: - do_signal must be able to recognize that get_signal_to_deliver changed the PC to a different location, and skip the restart-syscall sequence - once the debugger has restored all registers at the end of the inferior call sequence, do_signal must recognize that *now* it needs to restart the pending system call, even though it was now entered from a breakpoint instead of an actual svc instruction This set of issues is solved on other platforms, usually by one of two mechanisms: - The status information "do_signal is handling a system call that may need restarting" is itself carried in some register that can be accessed via ptrace. This is e.g. on Intel the "orig_eax" register; on Sparc the kernel defines a magic extra bit in the flags register for this purpose. This allows GDB to manage that state: reset it when doing an inferior call, and restore it after the call is finished. - On s390, do_signal transparently handles this problem without requiring GDB interaction, by performing system call restarting in the following way: first, adjust the PC as necessary for restarting the call. Then, call get_signal_to_deliver; and finally just continue execution at the PC. This way, if GDB does not change the PC, everything is as before. If GDB *does* change the PC, execution will simply continue there -- and once GDB restores the PC it saved at that point, it will automatically point to the *restarted* system call. (There is the minor twist how to handle system calls that do *not* need restarting -- do_signal will undo the PC change in this case, after get_signal_to_deliver has returned, and only if ptrace did not change the PC during that call.) Because there does not appear to be any obvious register to carry the syscall-restart information on ARM, we'd either have to introduce a new artificial ptrace register just for that purpose, or else handle the issue transparently like on s390. The patch below implements the second option; using this patch makes the interrupt.exp test cases pass on ARM, with no regression in the GDB test suite otherwise. Cc: patches@linaro.org Signed-off-by: Ulrich Weigand <ulrich.weigand@linaro.org> Signed-off-by: Arnd Bergmann <arnd.bergmann@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-05-04 01:32:55 +08:00
unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
struct ksignal ksig;
int restart = 0;
ARM: 6892/1: handle ptrace requests to change PC during interrupted system calls GDB's interrupt.exp test cases currenly fail on ARM. The problem is how do_signal handled restarting interrupted system calls: The entry.S assembler code determines that we come from a system call; and that information is passed as "syscall" parameter to do_signal. That routine then calls get_signal_to_deliver [*] and if a signal is to be delivered, calls into handle_signal. If a system call is to be restarted either after the signal handler returns, or if no handler is to be called in the first place, the PC is updated after the get_signal_to_deliver call, either in handle_signal (if we have a handler) or at the end of do_signal (otherwise). Now the problem is that during [*], the call to get_signal_to_deliver, a ptrace intercept may happen. During this intercept, the debugger may change registers, including the PC. This is done by GDB if it wants to execute an "inferior call", i.e. the execution of some code in the debugged program triggered by GDB. To this purpose, GDB will save all registers, allocate a stack frame, set up PC and arguments as appropriate for the call, and point the link register to a dummy breakpoint instruction. Once the process is restarted, it will execute the call and then trap back to the debugger, at which point GDB will restore all registers and continue original execution. This generally works fine. However, now consider what happens when GDB attempts to do exactly that while the process was interrupted during execution of a to-be- restarted system call: do_signal is called with the syscall flag set; it calls get_signal_to_deliver, at which point the debugger takes over and changes the PC to point to a completely different place. Now get_signal_to_deliver returns without a signal to deliver; but now do_signal decides it should be restarting a system call, and decrements the PC by 2 or 4 -- so it now points to 2 or 4 bytes before the function GDB wants to call -- which leads to a subsequent crash. To fix this problem, two things need to be supported: - do_signal must be able to recognize that get_signal_to_deliver changed the PC to a different location, and skip the restart-syscall sequence - once the debugger has restored all registers at the end of the inferior call sequence, do_signal must recognize that *now* it needs to restart the pending system call, even though it was now entered from a breakpoint instead of an actual svc instruction This set of issues is solved on other platforms, usually by one of two mechanisms: - The status information "do_signal is handling a system call that may need restarting" is itself carried in some register that can be accessed via ptrace. This is e.g. on Intel the "orig_eax" register; on Sparc the kernel defines a magic extra bit in the flags register for this purpose. This allows GDB to manage that state: reset it when doing an inferior call, and restore it after the call is finished. - On s390, do_signal transparently handles this problem without requiring GDB interaction, by performing system call restarting in the following way: first, adjust the PC as necessary for restarting the call. Then, call get_signal_to_deliver; and finally just continue execution at the PC. This way, if GDB does not change the PC, everything is as before. If GDB *does* change the PC, execution will simply continue there -- and once GDB restores the PC it saved at that point, it will automatically point to the *restarted* system call. (There is the minor twist how to handle system calls that do *not* need restarting -- do_signal will undo the PC change in this case, after get_signal_to_deliver has returned, and only if ptrace did not change the PC during that call.) Because there does not appear to be any obvious register to carry the syscall-restart information on ARM, we'd either have to introduce a new artificial ptrace register just for that purpose, or else handle the issue transparently like on s390. The patch below implements the second option; using this patch makes the interrupt.exp test cases pass on ARM, with no regression in the GDB test suite otherwise. Cc: patches@linaro.org Signed-off-by: Ulrich Weigand <ulrich.weigand@linaro.org> Signed-off-by: Arnd Bergmann <arnd.bergmann@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-05-04 01:32:55 +08:00
/*
* If we were from a system call, check for system call restarting...
*/
if (syscall) {
continue_addr = regs->ARM_pc;
restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4);
retval = regs->ARM_r0;
/*
* Prepare for system call restart. We do this here so that a
* debugger will see the already changed PSW.
*/
switch (retval) {
case -ERESTART_RESTARTBLOCK:
restart -= 2;
fallthrough;
ARM: 6892/1: handle ptrace requests to change PC during interrupted system calls GDB's interrupt.exp test cases currenly fail on ARM. The problem is how do_signal handled restarting interrupted system calls: The entry.S assembler code determines that we come from a system call; and that information is passed as "syscall" parameter to do_signal. That routine then calls get_signal_to_deliver [*] and if a signal is to be delivered, calls into handle_signal. If a system call is to be restarted either after the signal handler returns, or if no handler is to be called in the first place, the PC is updated after the get_signal_to_deliver call, either in handle_signal (if we have a handler) or at the end of do_signal (otherwise). Now the problem is that during [*], the call to get_signal_to_deliver, a ptrace intercept may happen. During this intercept, the debugger may change registers, including the PC. This is done by GDB if it wants to execute an "inferior call", i.e. the execution of some code in the debugged program triggered by GDB. To this purpose, GDB will save all registers, allocate a stack frame, set up PC and arguments as appropriate for the call, and point the link register to a dummy breakpoint instruction. Once the process is restarted, it will execute the call and then trap back to the debugger, at which point GDB will restore all registers and continue original execution. This generally works fine. However, now consider what happens when GDB attempts to do exactly that while the process was interrupted during execution of a to-be- restarted system call: do_signal is called with the syscall flag set; it calls get_signal_to_deliver, at which point the debugger takes over and changes the PC to point to a completely different place. Now get_signal_to_deliver returns without a signal to deliver; but now do_signal decides it should be restarting a system call, and decrements the PC by 2 or 4 -- so it now points to 2 or 4 bytes before the function GDB wants to call -- which leads to a subsequent crash. To fix this problem, two things need to be supported: - do_signal must be able to recognize that get_signal_to_deliver changed the PC to a different location, and skip the restart-syscall sequence - once the debugger has restored all registers at the end of the inferior call sequence, do_signal must recognize that *now* it needs to restart the pending system call, even though it was now entered from a breakpoint instead of an actual svc instruction This set of issues is solved on other platforms, usually by one of two mechanisms: - The status information "do_signal is handling a system call that may need restarting" is itself carried in some register that can be accessed via ptrace. This is e.g. on Intel the "orig_eax" register; on Sparc the kernel defines a magic extra bit in the flags register for this purpose. This allows GDB to manage that state: reset it when doing an inferior call, and restore it after the call is finished. - On s390, do_signal transparently handles this problem without requiring GDB interaction, by performing system call restarting in the following way: first, adjust the PC as necessary for restarting the call. Then, call get_signal_to_deliver; and finally just continue execution at the PC. This way, if GDB does not change the PC, everything is as before. If GDB *does* change the PC, execution will simply continue there -- and once GDB restores the PC it saved at that point, it will automatically point to the *restarted* system call. (There is the minor twist how to handle system calls that do *not* need restarting -- do_signal will undo the PC change in this case, after get_signal_to_deliver has returned, and only if ptrace did not change the PC during that call.) Because there does not appear to be any obvious register to carry the syscall-restart information on ARM, we'd either have to introduce a new artificial ptrace register just for that purpose, or else handle the issue transparently like on s390. The patch below implements the second option; using this patch makes the interrupt.exp test cases pass on ARM, with no regression in the GDB test suite otherwise. Cc: patches@linaro.org Signed-off-by: Ulrich Weigand <ulrich.weigand@linaro.org> Signed-off-by: Arnd Bergmann <arnd.bergmann@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-05-04 01:32:55 +08:00
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
restart++;
ARM: 6892/1: handle ptrace requests to change PC during interrupted system calls GDB's interrupt.exp test cases currenly fail on ARM. The problem is how do_signal handled restarting interrupted system calls: The entry.S assembler code determines that we come from a system call; and that information is passed as "syscall" parameter to do_signal. That routine then calls get_signal_to_deliver [*] and if a signal is to be delivered, calls into handle_signal. If a system call is to be restarted either after the signal handler returns, or if no handler is to be called in the first place, the PC is updated after the get_signal_to_deliver call, either in handle_signal (if we have a handler) or at the end of do_signal (otherwise). Now the problem is that during [*], the call to get_signal_to_deliver, a ptrace intercept may happen. During this intercept, the debugger may change registers, including the PC. This is done by GDB if it wants to execute an "inferior call", i.e. the execution of some code in the debugged program triggered by GDB. To this purpose, GDB will save all registers, allocate a stack frame, set up PC and arguments as appropriate for the call, and point the link register to a dummy breakpoint instruction. Once the process is restarted, it will execute the call and then trap back to the debugger, at which point GDB will restore all registers and continue original execution. This generally works fine. However, now consider what happens when GDB attempts to do exactly that while the process was interrupted during execution of a to-be- restarted system call: do_signal is called with the syscall flag set; it calls get_signal_to_deliver, at which point the debugger takes over and changes the PC to point to a completely different place. Now get_signal_to_deliver returns without a signal to deliver; but now do_signal decides it should be restarting a system call, and decrements the PC by 2 or 4 -- so it now points to 2 or 4 bytes before the function GDB wants to call -- which leads to a subsequent crash. To fix this problem, two things need to be supported: - do_signal must be able to recognize that get_signal_to_deliver changed the PC to a different location, and skip the restart-syscall sequence - once the debugger has restored all registers at the end of the inferior call sequence, do_signal must recognize that *now* it needs to restart the pending system call, even though it was now entered from a breakpoint instead of an actual svc instruction This set of issues is solved on other platforms, usually by one of two mechanisms: - The status information "do_signal is handling a system call that may need restarting" is itself carried in some register that can be accessed via ptrace. This is e.g. on Intel the "orig_eax" register; on Sparc the kernel defines a magic extra bit in the flags register for this purpose. This allows GDB to manage that state: reset it when doing an inferior call, and restore it after the call is finished. - On s390, do_signal transparently handles this problem without requiring GDB interaction, by performing system call restarting in the following way: first, adjust the PC as necessary for restarting the call. Then, call get_signal_to_deliver; and finally just continue execution at the PC. This way, if GDB does not change the PC, everything is as before. If GDB *does* change the PC, execution will simply continue there -- and once GDB restores the PC it saved at that point, it will automatically point to the *restarted* system call. (There is the minor twist how to handle system calls that do *not* need restarting -- do_signal will undo the PC change in this case, after get_signal_to_deliver has returned, and only if ptrace did not change the PC during that call.) Because there does not appear to be any obvious register to carry the syscall-restart information on ARM, we'd either have to introduce a new artificial ptrace register just for that purpose, or else handle the issue transparently like on s390. The patch below implements the second option; using this patch makes the interrupt.exp test cases pass on ARM, with no regression in the GDB test suite otherwise. Cc: patches@linaro.org Signed-off-by: Ulrich Weigand <ulrich.weigand@linaro.org> Signed-off-by: Arnd Bergmann <arnd.bergmann@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-05-04 01:32:55 +08:00
regs->ARM_r0 = regs->ARM_ORIG_r0;
regs->ARM_pc = restart_addr;
break;
}
}
/*
* Get the signal to deliver. When running under ptrace, at this
* point the debugger may change all our registers ...
*/
/*
* Depending on the signal settings we may need to revert the
* decision to restart the system call. But skip this if a
* debugger has chosen to restart at a different PC.
*/
if (get_signal(&ksig)) {
/* handler */
if (unlikely(restart) && regs->ARM_pc == restart_addr) {
if (retval == -ERESTARTNOHAND ||
retval == -ERESTART_RESTARTBLOCK
ARM: 6892/1: handle ptrace requests to change PC during interrupted system calls GDB's interrupt.exp test cases currenly fail on ARM. The problem is how do_signal handled restarting interrupted system calls: The entry.S assembler code determines that we come from a system call; and that information is passed as "syscall" parameter to do_signal. That routine then calls get_signal_to_deliver [*] and if a signal is to be delivered, calls into handle_signal. If a system call is to be restarted either after the signal handler returns, or if no handler is to be called in the first place, the PC is updated after the get_signal_to_deliver call, either in handle_signal (if we have a handler) or at the end of do_signal (otherwise). Now the problem is that during [*], the call to get_signal_to_deliver, a ptrace intercept may happen. During this intercept, the debugger may change registers, including the PC. This is done by GDB if it wants to execute an "inferior call", i.e. the execution of some code in the debugged program triggered by GDB. To this purpose, GDB will save all registers, allocate a stack frame, set up PC and arguments as appropriate for the call, and point the link register to a dummy breakpoint instruction. Once the process is restarted, it will execute the call and then trap back to the debugger, at which point GDB will restore all registers and continue original execution. This generally works fine. However, now consider what happens when GDB attempts to do exactly that while the process was interrupted during execution of a to-be- restarted system call: do_signal is called with the syscall flag set; it calls get_signal_to_deliver, at which point the debugger takes over and changes the PC to point to a completely different place. Now get_signal_to_deliver returns without a signal to deliver; but now do_signal decides it should be restarting a system call, and decrements the PC by 2 or 4 -- so it now points to 2 or 4 bytes before the function GDB wants to call -- which leads to a subsequent crash. To fix this problem, two things need to be supported: - do_signal must be able to recognize that get_signal_to_deliver changed the PC to a different location, and skip the restart-syscall sequence - once the debugger has restored all registers at the end of the inferior call sequence, do_signal must recognize that *now* it needs to restart the pending system call, even though it was now entered from a breakpoint instead of an actual svc instruction This set of issues is solved on other platforms, usually by one of two mechanisms: - The status information "do_signal is handling a system call that may need restarting" is itself carried in some register that can be accessed via ptrace. This is e.g. on Intel the "orig_eax" register; on Sparc the kernel defines a magic extra bit in the flags register for this purpose. This allows GDB to manage that state: reset it when doing an inferior call, and restore it after the call is finished. - On s390, do_signal transparently handles this problem without requiring GDB interaction, by performing system call restarting in the following way: first, adjust the PC as necessary for restarting the call. Then, call get_signal_to_deliver; and finally just continue execution at the PC. This way, if GDB does not change the PC, everything is as before. If GDB *does* change the PC, execution will simply continue there -- and once GDB restores the PC it saved at that point, it will automatically point to the *restarted* system call. (There is the minor twist how to handle system calls that do *not* need restarting -- do_signal will undo the PC change in this case, after get_signal_to_deliver has returned, and only if ptrace did not change the PC during that call.) Because there does not appear to be any obvious register to carry the syscall-restart information on ARM, we'd either have to introduce a new artificial ptrace register just for that purpose, or else handle the issue transparently like on s390. The patch below implements the second option; using this patch makes the interrupt.exp test cases pass on ARM, with no regression in the GDB test suite otherwise. Cc: patches@linaro.org Signed-off-by: Ulrich Weigand <ulrich.weigand@linaro.org> Signed-off-by: Arnd Bergmann <arnd.bergmann@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-05-04 01:32:55 +08:00
|| (retval == -ERESTARTSYS
&& !(ksig.ka.sa.sa_flags & SA_RESTART))) {
ARM: 6892/1: handle ptrace requests to change PC during interrupted system calls GDB's interrupt.exp test cases currenly fail on ARM. The problem is how do_signal handled restarting interrupted system calls: The entry.S assembler code determines that we come from a system call; and that information is passed as "syscall" parameter to do_signal. That routine then calls get_signal_to_deliver [*] and if a signal is to be delivered, calls into handle_signal. If a system call is to be restarted either after the signal handler returns, or if no handler is to be called in the first place, the PC is updated after the get_signal_to_deliver call, either in handle_signal (if we have a handler) or at the end of do_signal (otherwise). Now the problem is that during [*], the call to get_signal_to_deliver, a ptrace intercept may happen. During this intercept, the debugger may change registers, including the PC. This is done by GDB if it wants to execute an "inferior call", i.e. the execution of some code in the debugged program triggered by GDB. To this purpose, GDB will save all registers, allocate a stack frame, set up PC and arguments as appropriate for the call, and point the link register to a dummy breakpoint instruction. Once the process is restarted, it will execute the call and then trap back to the debugger, at which point GDB will restore all registers and continue original execution. This generally works fine. However, now consider what happens when GDB attempts to do exactly that while the process was interrupted during execution of a to-be- restarted system call: do_signal is called with the syscall flag set; it calls get_signal_to_deliver, at which point the debugger takes over and changes the PC to point to a completely different place. Now get_signal_to_deliver returns without a signal to deliver; but now do_signal decides it should be restarting a system call, and decrements the PC by 2 or 4 -- so it now points to 2 or 4 bytes before the function GDB wants to call -- which leads to a subsequent crash. To fix this problem, two things need to be supported: - do_signal must be able to recognize that get_signal_to_deliver changed the PC to a different location, and skip the restart-syscall sequence - once the debugger has restored all registers at the end of the inferior call sequence, do_signal must recognize that *now* it needs to restart the pending system call, even though it was now entered from a breakpoint instead of an actual svc instruction This set of issues is solved on other platforms, usually by one of two mechanisms: - The status information "do_signal is handling a system call that may need restarting" is itself carried in some register that can be accessed via ptrace. This is e.g. on Intel the "orig_eax" register; on Sparc the kernel defines a magic extra bit in the flags register for this purpose. This allows GDB to manage that state: reset it when doing an inferior call, and restore it after the call is finished. - On s390, do_signal transparently handles this problem without requiring GDB interaction, by performing system call restarting in the following way: first, adjust the PC as necessary for restarting the call. Then, call get_signal_to_deliver; and finally just continue execution at the PC. This way, if GDB does not change the PC, everything is as before. If GDB *does* change the PC, execution will simply continue there -- and once GDB restores the PC it saved at that point, it will automatically point to the *restarted* system call. (There is the minor twist how to handle system calls that do *not* need restarting -- do_signal will undo the PC change in this case, after get_signal_to_deliver has returned, and only if ptrace did not change the PC during that call.) Because there does not appear to be any obvious register to carry the syscall-restart information on ARM, we'd either have to introduce a new artificial ptrace register just for that purpose, or else handle the issue transparently like on s390. The patch below implements the second option; using this patch makes the interrupt.exp test cases pass on ARM, with no regression in the GDB test suite otherwise. Cc: patches@linaro.org Signed-off-by: Ulrich Weigand <ulrich.weigand@linaro.org> Signed-off-by: Arnd Bergmann <arnd.bergmann@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-05-04 01:32:55 +08:00
regs->ARM_r0 = -EINTR;
regs->ARM_pc = continue_addr;
}
}
handle_signal(&ksig, regs);
} else {
/* no handler */
restore_saved_sigmask();
if (unlikely(restart) && regs->ARM_pc == restart_addr) {
regs->ARM_pc = continue_addr;
return restart;
}
}
return 0;
}
asmlinkage int
do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
{
/*
* The assembly code enters us with IRQs off, but it hasn't
* informed the tracing code of that for efficiency reasons.
* Update the trace code with the current status.
*/
trace_hardirqs_off();
do {
if (likely(thread_flags & _TIF_NEED_RESCHED)) {
schedule();
} else {
if (unlikely(!user_mode(regs)))
return 0;
local_irq_enable();
if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
int restart = do_signal(regs, syscall);
if (unlikely(restart)) {
/*
* Restart without handlers.
* Deal with it without leaving
* the kernel space.
*/
return restart;
}
syscall = 0;
} else if (thread_flags & _TIF_UPROBE) {
uprobe_notify_resume(regs);
} else {
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}
}
local_irq_disable();
thread_flags = current_thread_info()->flags;
} while (thread_flags & _TIF_WORK_MASK);
return 0;
}
struct page *get_signal_page(void)
{
unsigned long ptr;
unsigned offset;
struct page *page;
void *addr;
page = alloc_pages(GFP_KERNEL, 0);
if (!page)
return NULL;
addr = page_address(page);
/* Poison the entire page */
memset32(addr, __opcode_to_mem_arm(0xe7fddef1),
PAGE_SIZE / sizeof(u32));
/* Give the signal return code some randomness */
offset = 0x200 + (get_random_int() & 0x7fc);
signal_return_offset = offset;
/* Copy signal return handlers into the page */
memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
/* Flush out all instructions in this page */
ptr = (unsigned long)addr;
flush_icache_range(ptr, ptr + PAGE_SIZE);
return page;
}
arm/syscalls: Optimize address limit check Disable the generic address limit check in favor of an architecture specific optimized implementation. The generic implementation using pending work flags did not work well with ARM and alignment faults. The address limit is checked on each syscall return path to user-mode path as well as the irq user-mode return function. If the address limit was changed, a function is called to report data corruption (stopping the kernel or process based on configuration). The address limit check has to be done before any pending work because they can reset the address limit and the process is killed using a SIGKILL signal. For example the lkdtm address limit check does not work because the signal to kill the process will reset the user-mode address limit. Signed-off-by: Thomas Garnier <thgarnie@google.com> Signed-off-by: Kees Cook <keescook@chromium.org> Tested-by: Kees Cook <keescook@chromium.org> Tested-by: Leonard Crestez <leonard.crestez@nxp.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Pratyush Anand <panand@redhat.com> Cc: Dave Martin <Dave.Martin@arm.com> Cc: Will Drewry <wad@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Andy Lutomirski <luto@amacapital.net> Cc: David Howells <dhowells@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-api@vger.kernel.org Cc: Yonghong Song <yhs@fb.com> Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1504798247-48833-4-git-send-email-keescook@chromium.org
2017-09-07 23:30:46 +08:00
/* Defer to generic check */
asmlinkage void addr_limit_check_failed(void)
{
#ifdef CONFIG_MMU
arm/syscalls: Optimize address limit check Disable the generic address limit check in favor of an architecture specific optimized implementation. The generic implementation using pending work flags did not work well with ARM and alignment faults. The address limit is checked on each syscall return path to user-mode path as well as the irq user-mode return function. If the address limit was changed, a function is called to report data corruption (stopping the kernel or process based on configuration). The address limit check has to be done before any pending work because they can reset the address limit and the process is killed using a SIGKILL signal. For example the lkdtm address limit check does not work because the signal to kill the process will reset the user-mode address limit. Signed-off-by: Thomas Garnier <thgarnie@google.com> Signed-off-by: Kees Cook <keescook@chromium.org> Tested-by: Kees Cook <keescook@chromium.org> Tested-by: Leonard Crestez <leonard.crestez@nxp.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Pratyush Anand <panand@redhat.com> Cc: Dave Martin <Dave.Martin@arm.com> Cc: Will Drewry <wad@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Andy Lutomirski <luto@amacapital.net> Cc: David Howells <dhowells@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-api@vger.kernel.org Cc: Yonghong Song <yhs@fb.com> Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1504798247-48833-4-git-send-email-keescook@chromium.org
2017-09-07 23:30:46 +08:00
addr_limit_user_check();
#endif
arm/syscalls: Optimize address limit check Disable the generic address limit check in favor of an architecture specific optimized implementation. The generic implementation using pending work flags did not work well with ARM and alignment faults. The address limit is checked on each syscall return path to user-mode path as well as the irq user-mode return function. If the address limit was changed, a function is called to report data corruption (stopping the kernel or process based on configuration). The address limit check has to be done before any pending work because they can reset the address limit and the process is killed using a SIGKILL signal. For example the lkdtm address limit check does not work because the signal to kill the process will reset the user-mode address limit. Signed-off-by: Thomas Garnier <thgarnie@google.com> Signed-off-by: Kees Cook <keescook@chromium.org> Tested-by: Kees Cook <keescook@chromium.org> Tested-by: Leonard Crestez <leonard.crestez@nxp.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Pratyush Anand <panand@redhat.com> Cc: Dave Martin <Dave.Martin@arm.com> Cc: Will Drewry <wad@chromium.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Andy Lutomirski <luto@amacapital.net> Cc: David Howells <dhowells@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-api@vger.kernel.org Cc: Yonghong Song <yhs@fb.com> Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1504798247-48833-4-git-send-email-keescook@chromium.org
2017-09-07 23:30:46 +08:00
}
#ifdef CONFIG_DEBUG_RSEQ
asmlinkage void do_rseq_syscall(struct pt_regs *regs)
{
rseq_syscall(regs);
}
#endif