linux_old1/arch/powerpc/kernel/signal_32.c

1561 lines
44 KiB
C
Raw Normal View History

/*
* Signal handling for 32bit PPC and 32bit tasks on 64bit PPC
*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
* Copyright (C) 2001 IBM
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
*
* Derived from "arch/i386/kernel/signal.c"
* Copyright (C) 1991, 1992 Linus Torvalds
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/elf.h>
#include <linux/ptrace.h>
#include <linux/ratelimit.h>
#ifdef CONFIG_PPC64
#include <linux/syscalls.h>
#include <linux/compat.h>
#else
#include <linux/wait.h>
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#endif
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/syscalls.h>
#include <asm/sigcontext.h>
#include <asm/vdso.h>
#include <asm/switch_to.h>
#include <asm/tm.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
#include <asm/unistd.h>
#else
#include <asm/ucontext.h>
#include <asm/pgtable.h>
#endif
#include "signal.h"
#ifdef CONFIG_PPC64
#define sys_rt_sigreturn compat_sys_rt_sigreturn
#define sys_swapcontext compat_sys_swapcontext
#define sys_sigreturn compat_sys_sigreturn
#define old_sigaction old_sigaction32
#define sigcontext sigcontext32
#define mcontext mcontext32
#define ucontext ucontext32
#define __save_altstack __compat_save_altstack
/*
* Userspace code may pass a ucontext which doesn't include VSX added
* at the end. We need to check for this case.
*/
#define UCONTEXTSIZEWITHOUTVSX \
(sizeof(struct ucontext) - sizeof(elf_vsrreghalf_t32))
/*
* Returning 0 means we return to userspace via
* ret_from_except and thus restore all user
* registers from *regs. This is what we need
* to do when a signal has been delivered.
*/
#define GP_REGS_SIZE min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32))
#undef __SIGNAL_FRAMESIZE
#define __SIGNAL_FRAMESIZE __SIGNAL_FRAMESIZE32
#undef ELF_NVRREG
#define ELF_NVRREG ELF_NVRREG32
/*
* Functions for flipping sigsets (thanks to brain dead generic
* implementation that makes things simple for little endian only)
*/
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
{
compat_sigset_t cset;
switch (_NSIG_WORDS) {
case 4: cset.sig[6] = set->sig[3] & 0xffffffffull;
cset.sig[7] = set->sig[3] >> 32;
case 3: cset.sig[4] = set->sig[2] & 0xffffffffull;
cset.sig[5] = set->sig[2] >> 32;
case 2: cset.sig[2] = set->sig[1] & 0xffffffffull;
cset.sig[3] = set->sig[1] >> 32;
case 1: cset.sig[0] = set->sig[0] & 0xffffffffull;
cset.sig[1] = set->sig[0] >> 32;
}
return copy_to_user(uset, &cset, sizeof(*uset));
}
static inline int get_sigset_t(sigset_t *set,
const compat_sigset_t __user *uset)
{
compat_sigset_t s32;
if (copy_from_user(&s32, uset, sizeof(*uset)))
return -EFAULT;
/*
* Swap the 2 words of the 64-bit sigset_t (they are stored
* in the "wrong" endian in 32-bit user storage).
*/
switch (_NSIG_WORDS) {
case 4: set->sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
case 3: set->sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
case 2: set->sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
case 1: set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
}
return 0;
}
#define to_user_ptr(p) ptr_to_compat(p)
#define from_user_ptr(p) compat_ptr(p)
static inline int save_general_regs(struct pt_regs *regs,
struct mcontext __user *frame)
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
powerpc: Fix various syscall/signal/swapcontext bugs A careful reading of the recent changes to the system call entry/exit paths revealed several problems, plus some things that could be simplified and improved: * 32-bit wasn't testing the _TIF_NOERROR bit in the syscall fast exit path, so it was only doing anything with it once it saw some other bit being set. In other words, the noerror behaviour would apply to the next system call where we had to reschedule or deliver a signal, which is not necessarily the current system call. * 32-bit wasn't doing the call to ptrace_notify in the syscall exit path when the _TIF_SINGLESTEP bit was set. * _TIF_RESTOREALL was in both _TIF_USER_WORK_MASK and _TIF_PERSYSCALL_MASK, which is odd since _TIF_RESTOREALL is only set by system calls. I took it out of _TIF_USER_WORK_MASK. * On 64-bit, _TIF_RESTOREALL wasn't causing the non-volatile registers to be restored (unless perhaps a signal was delivered or the syscall was traced or single-stepped). Thus the non-volatile registers weren't restored on exit from a signal handler. We probably got away with it mostly because signal handlers written in C wouldn't alter the non-volatile registers. * On 32-bit I simplified the code and made it more like 64-bit by making the syscall exit path jump to ret_from_except to handle preemption and signal delivery. * 32-bit was calling do_signal unnecessarily when _TIF_RESTOREALL was set - but I think because of that 32-bit was actually restoring the non-volatile registers on exit from a signal handler. * I changed the order of enabling interrupts and saving the non-volatile registers before calling do_syscall_trace_leave; now we enable interrupts first. Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-03-08 10:24:22 +08:00
WARN_ON(!FULL_REGS(regs));
[PATCH] syscall entry/exit revamp This cleanup patch speeds up the null syscall path on ppc64 by about 3%, and brings the ppc32 and ppc64 code slightly closer together. The ppc64 code was checking current_thread_info()->flags twice in the syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after disabling interrupts. Now we do the same as ppc32 -- check the flags only once in the fast path, and re-enable interrupts if necessary in the ptrace case. The patch abolishes the 'syscall_noerror' member of struct thread_info and replaces it with a TIF_NOERROR bit in the flags, which is handled in the slow path. This shortens the syscall entry code, which no longer needs to clear syscall_noerror. The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow path to save the non-volatile GPRs into a signal frame. This removes the need for the assembly wrappers around sys_sigsuspend(), sys_rt_sigsuspend(), et al which existed solely to save those registers in advance. It also means I don't have to add new wrappers for ppoll() and pselect(), which is what I was supposed to be doing when I got distracted into this... Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit directly into a signal handler (as required by sigsuspend et al) by introducing a TIF_RESTOREALL flag which causes _all_ the registers to be reloaded from the pt_regs by taking the ret_from_exception path, instead of the normal syscall exit path which stomps on the callee-saved GPRs. It appears to pass an LTP test run on ppc64, and passes basic testing on ppc32 too. Brief tests of ptrace functionality with strace and gdb also appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :) Signed-off-by: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
for (i = 0; i <= PT_RESULT; i ++) {
if (i == 14 && !FULL_REGS(regs))
i = 32;
if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
return -EFAULT;
[PATCH] syscall entry/exit revamp This cleanup patch speeds up the null syscall path on ppc64 by about 3%, and brings the ppc32 and ppc64 code slightly closer together. The ppc64 code was checking current_thread_info()->flags twice in the syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after disabling interrupts. Now we do the same as ppc32 -- check the flags only once in the fast path, and re-enable interrupts if necessary in the ptrace case. The patch abolishes the 'syscall_noerror' member of struct thread_info and replaces it with a TIF_NOERROR bit in the flags, which is handled in the slow path. This shortens the syscall entry code, which no longer needs to clear syscall_noerror. The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow path to save the non-volatile GPRs into a signal frame. This removes the need for the assembly wrappers around sys_sigsuspend(), sys_rt_sigsuspend(), et al which existed solely to save those registers in advance. It also means I don't have to add new wrappers for ppoll() and pselect(), which is what I was supposed to be doing when I got distracted into this... Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit directly into a signal handler (as required by sigsuspend et al) by introducing a TIF_RESTOREALL flag which causes _all_ the registers to be reloaded from the pt_regs by taking the ret_from_exception path, instead of the normal syscall exit path which stomps on the callee-saved GPRs. It appears to pass an LTP test run on ppc64, and passes basic testing on ppc32 too. Brief tests of ptrace functionality with strace and gdb also appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :) Signed-off-by: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
}
return 0;
}
static inline int restore_general_regs(struct pt_regs *regs,
struct mcontext __user *sr)
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
for (i = 0; i <= PT_RESULT; i++) {
if ((i == PT_MSR) || (i == PT_SOFTE))
continue;
if (__get_user(gregs[i], &sr->mc_gregs[i]))
return -EFAULT;
}
return 0;
}
#else /* CONFIG_PPC64 */
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set)
{
return copy_to_user(uset, set, sizeof(*uset));
}
static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
{
return copy_from_user(set, uset, sizeof(*uset));
}
#define to_user_ptr(p) ((unsigned long)(p))
#define from_user_ptr(p) ((void __user *)(p))
static inline int save_general_regs(struct pt_regs *regs,
struct mcontext __user *frame)
{
powerpc: Fix various syscall/signal/swapcontext bugs A careful reading of the recent changes to the system call entry/exit paths revealed several problems, plus some things that could be simplified and improved: * 32-bit wasn't testing the _TIF_NOERROR bit in the syscall fast exit path, so it was only doing anything with it once it saw some other bit being set. In other words, the noerror behaviour would apply to the next system call where we had to reschedule or deliver a signal, which is not necessarily the current system call. * 32-bit wasn't doing the call to ptrace_notify in the syscall exit path when the _TIF_SINGLESTEP bit was set. * _TIF_RESTOREALL was in both _TIF_USER_WORK_MASK and _TIF_PERSYSCALL_MASK, which is odd since _TIF_RESTOREALL is only set by system calls. I took it out of _TIF_USER_WORK_MASK. * On 64-bit, _TIF_RESTOREALL wasn't causing the non-volatile registers to be restored (unless perhaps a signal was delivered or the syscall was traced or single-stepped). Thus the non-volatile registers weren't restored on exit from a signal handler. We probably got away with it mostly because signal handlers written in C wouldn't alter the non-volatile registers. * On 32-bit I simplified the code and made it more like 64-bit by making the syscall exit path jump to ret_from_except to handle preemption and signal delivery. * 32-bit was calling do_signal unnecessarily when _TIF_RESTOREALL was set - but I think because of that 32-bit was actually restoring the non-volatile registers on exit from a signal handler. * I changed the order of enabling interrupts and saving the non-volatile registers before calling do_syscall_trace_leave; now we enable interrupts first. Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-03-08 10:24:22 +08:00
WARN_ON(!FULL_REGS(regs));
return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE);
}
static inline int restore_general_regs(struct pt_regs *regs,
struct mcontext __user *sr)
{
/* copy up to but not including MSR */
if (__copy_from_user(regs, &sr->mc_gregs,
PT_MSR * sizeof(elf_greg_t)))
return -EFAULT;
/* copy from orig_r3 (the word after the MSR) up to the end */
if (__copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t)))
return -EFAULT;
return 0;
}
#endif
/*
* When we have signals to deliver, we set up on the
* user stack, going down from the original stack pointer:
* an ABI gap of 56 words
* an mcontext struct
* a sigcontext struct
* a gap of __SIGNAL_FRAMESIZE bytes
*
* Each of these things must be a multiple of 16 bytes in size. The following
* structure represent all of this except the __SIGNAL_FRAMESIZE gap
*
*/
struct sigframe {
struct sigcontext sctx; /* the sigcontext */
struct mcontext mctx; /* all the register values */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
struct sigcontext sctx_transact;
struct mcontext mctx_transact;
#endif
/*
* Programs using the rs6000/xcoff abi can save up to 19 gp
* regs and 18 fp regs below sp before decrementing it.
*/
int abigap[56];
};
/* We use the mc_pad field for the signal return trampoline. */
#define tramp mc_pad
/*
* When we have rt signals to deliver, we set up on the
* user stack, going down from the original stack pointer:
* one rt_sigframe struct (siginfo + ucontext + ABI gap)
* a gap of __SIGNAL_FRAMESIZE+16 bytes
* (the +16 is to get the siginfo and ucontext in the same
* positions as in older kernels).
*
* Each of these things must be a multiple of 16 bytes in size.
*
*/
struct rt_sigframe {
#ifdef CONFIG_PPC64
compat_siginfo_t info;
#else
struct siginfo info;
#endif
struct ucontext uc;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
struct ucontext uc_transact;
#endif
/*
* Programs using the rs6000/xcoff abi can save up to 19 gp
* regs and 18 fp regs below sp before decrementing it.
*/
int abigap[56];
};
#ifdef CONFIG_VSX
unsigned long copy_fpr_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NFPREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
buf[i] = task->thread.TS_FPR(i);
buf[i] = task->thread.fp_state.fpscr;
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
}
unsigned long copy_fpr_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NFPREG];
int i;
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
return 1;
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
task->thread.TS_FPR(i) = buf[i];
task->thread.fp_state.fpscr = buf[i];
return 0;
}
unsigned long copy_vsx_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NVSRHALFREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < ELF_NVSRHALFREG; i++)
buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
}
unsigned long copy_vsx_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NVSRHALFREG];
int i;
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
return 1;
for (i = 0; i < ELF_NVSRHALFREG ; i++)
task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return 0;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
unsigned long copy_transact_fpr_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NFPREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
buf[i] = task->thread.TS_TRANS_FPR(i);
buf[i] = task->thread.transact_fp.fpscr;
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
}
unsigned long copy_transact_fpr_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NFPREG];
int i;
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
return 1;
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
task->thread.TS_TRANS_FPR(i) = buf[i];
task->thread.transact_fp.fpscr = buf[i];
return 0;
}
unsigned long copy_transact_vsx_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NVSRHALFREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < ELF_NVSRHALFREG; i++)
buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET];
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
}
unsigned long copy_transact_vsx_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NVSRHALFREG];
int i;
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
return 1;
for (i = 0; i < ELF_NVSRHALFREG ; i++)
task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return 0;
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#else
inline unsigned long copy_fpr_to_user(void __user *to,
struct task_struct *task)
{
return __copy_to_user(to, task->thread.fp_state.fpr,
ELF_NFPREG * sizeof(double));
}
inline unsigned long copy_fpr_from_user(struct task_struct *task,
void __user *from)
{
return __copy_from_user(task->thread.fp_state.fpr, from,
ELF_NFPREG * sizeof(double));
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
inline unsigned long copy_transact_fpr_to_user(void __user *to,
struct task_struct *task)
{
return __copy_to_user(to, task->thread.transact_fp.fpr,
ELF_NFPREG * sizeof(double));
}
inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
void __user *from)
{
return __copy_from_user(task->thread.transact_fp.fpr, from,
ELF_NFPREG * sizeof(double));
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#endif
/*
* Save the current user registers on the user stack.
* We only save the altivec/spe registers if the process has used
* altivec/spe instructions at some point.
*/
static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
struct mcontext __user *tm_frame, int sigret,
int ctx_has_vsx_region)
{
unsigned long msr = regs->msr;
/* Make sure floating point registers are stored in regs */
flush_fp_to_thread(current);
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
/* save general registers */
if (save_general_regs(regs, frame))
return 1;
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
flush_altivec_to_thread(current);
if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
ELF_NVRREG * sizeof(vector128)))
return 1;
/* set MSR_VEC in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
msr |= MSR_VEC;
}
/* else assert((regs->msr & MSR_VEC) == 0) */
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec. Since VSCR only contains 32 bits saved in the least
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
* most significant bits of that same vector. --BenH
* Note that the current VRSAVE value is in the SPR at this point.
*/
if (cpu_has_feature(CPU_FTR_ALTIVEC))
current->thread.vrsave = mfspr(SPRN_VRSAVE);
if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
return 1;
#endif /* CONFIG_ALTIVEC */
if (copy_fpr_to_user(&frame->mc_fregs, current))
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
return 1;
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
* to this context, except in the specific case below where we set it.
*/
msr &= ~MSR_VSX;
#ifdef CONFIG_VSX
/*
* Copy VSR 0-31 upper half from thread_struct to local
* buffer, then write that to userspace. Also set MSR_VSX in
* the saved MSR value to indicate that frame->mc_vregs
* contains valid data
*/
if (current->thread.used_vsr && ctx_has_vsx_region) {
__giveup_vsx(current);
if (copy_vsx_to_user(&frame->mc_vsregs, current))
return 1;
msr |= MSR_VSX;
}
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* save spe registers */
if (current->thread.used_spe) {
flush_spe_to_thread(current);
if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
ELF_NEVRREG * sizeof(u32)))
return 1;
/* set MSR_SPE in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
msr |= MSR_SPE;
}
/* else assert((regs->msr & MSR_SPE) == 0) */
/* We always copy to/from spefscr */
if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
return 1;
#endif /* CONFIG_SPE */
if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
return 1;
/* We need to write 0 the MSR top 32 bits in the tm frame so that we
* can check it on the restore to see if TM is active
*/
if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
return 1;
if (sigret) {
/* Set up the sigreturn trampoline: li r0,sigret; sc */
if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
|| __put_user(0x44000002UL, &frame->tramp[1]))
return 1;
flush_icache_range((unsigned long) &frame->tramp[0],
(unsigned long) &frame->tramp[2]);
}
return 0;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Save the current user registers on the user stack.
* We only save the altivec/spe registers if the process has used
* altivec/spe instructions at some point.
* We also save the transactional registers to a second ucontext in the
* frame.
*
* See save_user_regs() and signal_64.c:setup_tm_sigcontexts().
*/
static int save_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *frame,
struct mcontext __user *tm_frame, int sigret)
{
unsigned long msr = regs->msr;
powerpc: Don't corrupt transactional state when using FP/VMX in kernel Currently, when we have a process using the transactional memory facilities on POWER8 (that is, the processor is in transactional or suspended state), and the process enters the kernel and the kernel then uses the floating-point or vector (VMX/Altivec) facility, we end up corrupting the user-visible FP/VMX/VSX state. This happens, for example, if a page fault causes a copy-on-write operation, because the copy_page function will use VMX to do the copy on POWER8. The test program below demonstrates the bug. The bug happens because when FP/VMX state for a transactional process is stored in the thread_struct, we store the checkpointed state in .fp_state/.vr_state and the transactional (current) state in .transact_fp/.transact_vr. However, when the kernel wants to use FP/VMX, it calls enable_kernel_fp() or enable_kernel_altivec(), which saves the current state in .fp_state/.vr_state. Furthermore, when we return to the user process we return with FP/VMX/VSX disabled. The next time the process uses FP/VMX/VSX, we don't know which set of state (the current register values, .fp_state/.vr_state, or .transact_fp/.transact_vr) we should be using, since we have no way to tell if we are still in the same transaction, and if not, whether the previous transaction succeeded or failed. Thus it is necessary to strictly adhere to the rule that if FP has been enabled at any point in a transaction, we must keep FP enabled for the user process with the current transactional state in the FP registers, until we detect that it is no longer in a transaction. Similarly for VMX; once enabled it must stay enabled until the process is no longer transactional. In order to keep this rule, we add a new thread_info flag which we test when returning from the kernel to userspace, called TIF_RESTORE_TM. This flag indicates that there is FP/VMX/VSX state to be restored before entering userspace, and when it is set the .tm_orig_msr field in the thread_struct indicates what state needs to be restored. The restoration is done by restore_tm_state(). The TIF_RESTORE_TM bit is set by new giveup_fpu/altivec_maybe_transactional helpers, which are called from enable_kernel_fp/altivec, giveup_vsx, and flush_fp/altivec_to_thread instead of giveup_fpu/altivec. The other thing to be done is to get the transactional FP/VMX/VSX state from .fp_state/.vr_state when doing reclaim, if that state has been saved there by giveup_fpu/altivec_maybe_transactional. Having done this, we set the FP/VMX bit in the thread's MSR after reclaim to indicate that that part of the state is now valid (having been reclaimed from the processor's checkpointed state). Finally, in the signal handling code, we move the clearing of the transactional state bits in the thread's MSR a bit earlier, before calling flush_fp_to_thread(), so that we don't unnecessarily set the TIF_RESTORE_TM bit. This is the test program: /* Michael Neuling 4/12/2013 * * See if the altivec state is leaked out of an aborted transaction due to * kernel vmx copy loops. * * gcc -m64 htm_vmxcopy.c -o htm_vmxcopy * */ /* We don't use all of these, but for reference: */ int main(int argc, char *argv[]) { long double vecin = 1.3; long double vecout; unsigned long pgsize = getpagesize(); int i; int fd; int size = pgsize*16; char tmpfile[] = "/tmp/page_faultXXXXXX"; char buf[pgsize]; char *a; uint64_t aborted = 0; fd = mkstemp(tmpfile); assert(fd >= 0); memset(buf, 0, pgsize); for (i = 0; i < size; i += pgsize) assert(write(fd, buf, pgsize) == pgsize); unlink(tmpfile); a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); assert(a != MAP_FAILED); asm __volatile__( "lxvd2x 40,0,%[vecinptr] ; " // set 40 to initial value TBEGIN "beq 3f ;" TSUSPEND "xxlxor 40,40,40 ; " // set 40 to 0 "std 5, 0(%[map]) ;" // cause kernel vmx copy page TABORT TRESUME TEND "li %[res], 0 ;" "b 5f ;" "3: ;" // Abort handler "li %[res], 1 ;" "5: ;" "stxvd2x 40,0,%[vecoutptr] ; " : [res]"=r"(aborted) : [vecinptr]"r"(&vecin), [vecoutptr]"r"(&vecout), [map]"r"(a) : "memory", "r0", "r3", "r4", "r5", "r6", "r7"); if (aborted && (vecin != vecout)){ printf("FAILED: vector state leaked on abort %f != %f\n", (double)vecin, (double)vecout); exit(1); } munmap(a, size); close(fd); printf("PASSED!\n"); return 0; } Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2014-01-13 12:56:29 +08:00
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
* just indicates to userland that we were doing a transaction, but we
* don't want to return in transactional state. This also ensures
* that flush_fp_to_thread won't set TIF_RESTORE_TM again.
*/
regs->msr &= ~MSR_TS_MASK;
/* Make sure floating point registers are stored in regs */
flush_fp_to_thread(current);
/* Save both sets of general registers */
if (save_general_regs(&current->thread.ckpt_regs, frame)
|| save_general_regs(regs, tm_frame))
return 1;
/* Stash the top half of the 64bit MSR into the 32bit MSR word
* of the transactional mcontext. This way we have a backward-compatible
* MSR in the 'normal' (checkpointed) mcontext and additionally one can
* also look at what type of transaction (T or S) was active at the
* time of the signal.
*/
if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR]))
return 1;
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
flush_altivec_to_thread(current);
if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
ELF_NVRREG * sizeof(vector128)))
return 1;
if (msr & MSR_VEC) {
if (__copy_to_user(&tm_frame->mc_vregs,
&current->thread.transact_vr,
ELF_NVRREG * sizeof(vector128)))
return 1;
} else {
if (__copy_to_user(&tm_frame->mc_vregs,
&current->thread.vr_state,
ELF_NVRREG * sizeof(vector128)))
return 1;
}
/* set MSR_VEC in the saved MSR value to indicate that
* frame->mc_vregs contains valid data
*/
msr |= MSR_VEC;
}
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec. Since VSCR only contains 32 bits saved in the least
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
* most significant bits of that same vector. --BenH
*/
if (cpu_has_feature(CPU_FTR_ALTIVEC))
current->thread.vrsave = mfspr(SPRN_VRSAVE);
if (__put_user(current->thread.vrsave,
(u32 __user *)&frame->mc_vregs[32]))
return 1;
if (msr & MSR_VEC) {
if (__put_user(current->thread.transact_vrsave,
(u32 __user *)&tm_frame->mc_vregs[32]))
return 1;
} else {
if (__put_user(current->thread.vrsave,
(u32 __user *)&tm_frame->mc_vregs[32]))
return 1;
}
#endif /* CONFIG_ALTIVEC */
if (copy_fpr_to_user(&frame->mc_fregs, current))
return 1;
if (msr & MSR_FP) {
if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current))
return 1;
} else {
if (copy_fpr_to_user(&tm_frame->mc_fregs, current))
return 1;
}
#ifdef CONFIG_VSX
/*
* Copy VSR 0-31 upper half from thread_struct to local
* buffer, then write that to userspace. Also set MSR_VSX in
* the saved MSR value to indicate that frame->mc_vregs
* contains valid data
*/
if (current->thread.used_vsr) {
__giveup_vsx(current);
if (copy_vsx_to_user(&frame->mc_vsregs, current))
return 1;
if (msr & MSR_VSX) {
if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs,
current))
return 1;
} else {
if (copy_vsx_to_user(&tm_frame->mc_vsregs, current))
return 1;
}
msr |= MSR_VSX;
}
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* SPE regs are not checkpointed with TM, so this section is
* simply the same as in save_user_regs().
*/
if (current->thread.used_spe) {
flush_spe_to_thread(current);
if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
ELF_NEVRREG * sizeof(u32)))
return 1;
/* set MSR_SPE in the saved MSR value to indicate that
* frame->mc_vregs contains valid data */
msr |= MSR_SPE;
}
/* We always copy to/from spefscr */
if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
return 1;
#endif /* CONFIG_SPE */
if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
return 1;
if (sigret) {
/* Set up the sigreturn trampoline: li r0,sigret; sc */
if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
|| __put_user(0x44000002UL, &frame->tramp[1]))
return 1;
flush_icache_range((unsigned long) &frame->tramp[0],
(unsigned long) &frame->tramp[2]);
}
return 0;
}
#endif
/*
* Restore the current user register values from the user stack,
* (except for MSR).
*/
static long restore_user_regs(struct pt_regs *regs,
struct mcontext __user *sr, int sig)
{
long err;
unsigned int save_r2 = 0;
unsigned long msr;
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
#ifdef CONFIG_VSX
int i;
#endif
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal
*/
if (!sig)
save_r2 = (unsigned int)regs->gpr[2];
err = restore_general_regs(regs, sr);
regs->trap = 0;
err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
if (!sig)
regs->gpr[2] = (unsigned long) save_r2;
if (err)
return 1;
/* if doing signal return, restore the previous little-endian mode */
if (sig)
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
/*
* Do this before updating the thread state in
* current->thread.fpr/vr/evr. That way, if we get preempted
* and another task grabs the FPU/Altivec/SPE, it won't be
* tempted to save the current CPU state into the thread_struct
* and corrupt what we are writing there.
*/
discard_lazy_cpu_state();
#ifdef CONFIG_ALTIVEC
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
/*
* Force the process to reload the altivec registers from
* current->thread when it next does altivec instructions
*/
regs->msr &= ~MSR_VEC;
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
sizeof(sr->mc_vregs)))
return 1;
} else if (current->thread.used_vr)
memset(&current->thread.vr_state, 0,
ELF_NVRREG * sizeof(vector128));
/* Always get VRSAVE back */
if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
return 1;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
if (copy_fpr_from_user(current, &sr->mc_fregs))
return 1;
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
#ifdef CONFIG_VSX
/*
* Force the process to reload the VSX registers from
* current->thread when it next does VSX instruction.
*/
regs->msr &= ~MSR_VSX;
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
* buffer, then write this out to the thread_struct
*/
if (copy_vsx_from_user(current, &sr->mc_vsregs))
return 1;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++)
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
#endif /* CONFIG_VSX */
/*
* force the process to reload the FP registers from
* current->thread when it next does FP instructions
*/
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
#ifdef CONFIG_SPE
/* force the process to reload the spe registers from
current->thread when it next does spe instructions */
regs->msr &= ~MSR_SPE;
if (msr & MSR_SPE) {
/* restore spe registers from the stack */
if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
ELF_NEVRREG * sizeof(u32)))
return 1;
} else if (current->thread.used_spe)
memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
/* Always get SPEFSCR back */
if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG))
return 1;
#endif /* CONFIG_SPE */
return 0;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Restore the current user register values from the user stack, except for
* MSR, and recheckpoint the original checkpointed register state for processes
* in transactions.
*/
static long restore_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *sr,
struct mcontext __user *tm_sr)
{
long err;
unsigned long msr, msr_hi;
#ifdef CONFIG_VSX
int i;
#endif
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal.
* See comment in signal_64.c:restore_tm_sigcontexts();
* TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
* were set by the signal delivery.
*/
err = restore_general_regs(regs, tm_sr);
err |= restore_general_regs(&current->thread.ckpt_regs, sr);
err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]);
err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
if (err)
return 1;
/* Restore the previous little-endian mode */
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
/*
* Do this before updating the thread state in
* current->thread.fpr/vr/evr. That way, if we get preempted
* and another task grabs the FPU/Altivec/SPE, it won't be
* tempted to save the current CPU state into the thread_struct
* and corrupt what we are writing there.
*/
discard_lazy_cpu_state();
#ifdef CONFIG_ALTIVEC
regs->msr &= ~MSR_VEC;
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
sizeof(sr->mc_vregs)) ||
__copy_from_user(&current->thread.transact_vr,
&tm_sr->mc_vregs,
sizeof(sr->mc_vregs)))
return 1;
} else if (current->thread.used_vr) {
memset(&current->thread.vr_state, 0,
ELF_NVRREG * sizeof(vector128));
memset(&current->thread.transact_vr, 0,
ELF_NVRREG * sizeof(vector128));
}
/* Always get VRSAVE back */
if (__get_user(current->thread.vrsave,
(u32 __user *)&sr->mc_vregs[32]) ||
__get_user(current->thread.transact_vrsave,
(u32 __user *)&tm_sr->mc_vregs[32]))
return 1;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
if (copy_fpr_from_user(current, &sr->mc_fregs) ||
copy_transact_fpr_from_user(current, &tm_sr->mc_fregs))
return 1;
#ifdef CONFIG_VSX
regs->msr &= ~MSR_VSX;
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
* buffer, then write this out to the thread_struct
*/
if (copy_vsx_from_user(current, &sr->mc_vsregs) ||
copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs))
return 1;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++) {
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
}
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* SPE regs are not checkpointed with TM, so this section is
* simply the same as in restore_user_regs().
*/
regs->msr &= ~MSR_SPE;
if (msr & MSR_SPE) {
if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
ELF_NEVRREG * sizeof(u32)))
return 1;
} else if (current->thread.used_spe)
memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
/* Always get SPEFSCR back */
if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs
+ ELF_NEVRREG))
return 1;
#endif /* CONFIG_SPE */
/* Now, recheckpoint. This loads up all of the checkpointed (older)
* registers, including FP and V[S]Rs. After recheckpointing, the
* transactional versions should be loaded.
*/
tm_enable();
powerpc/tm: Disable IRQ in tm_recheckpoint We can't take an IRQ when we're about to do a trechkpt as our GPR state is set to user GPR values. We've hit this when running some IBM Java stress tests in the lab resulting in the following dump: cpu 0x3f: Vector: 700 (Program Check) at [c000000007eb3d40] pc: c000000000050074: restore_gprs+0xc0/0x148 lr: 00000000b52a8184 sp: ac57d360 msr: 8000000100201030 current = 0xc00000002c500000 paca = 0xc000000007dbfc00 softe: 0 irq_happened: 0x00 pid = 34535, comm = Pooled Thread # R00 = 00000000b52a8184 R16 = 00000000b3e48fda R01 = 00000000ac57d360 R17 = 00000000ade79bd8 R02 = 00000000ac586930 R18 = 000000000fac9bcc R03 = 00000000ade60000 R19 = 00000000ac57f930 R04 = 00000000f6624918 R20 = 00000000ade79be8 R05 = 00000000f663f238 R21 = 00000000ac218a54 R06 = 0000000000000002 R22 = 000000000f956280 R07 = 0000000000000008 R23 = 000000000000007e R08 = 000000000000000a R24 = 000000000000000c R09 = 00000000b6e69160 R25 = 00000000b424cf00 R10 = 0000000000000181 R26 = 00000000f66256d4 R11 = 000000000f365ec0 R27 = 00000000b6fdcdd0 R12 = 00000000f66400f0 R28 = 0000000000000001 R13 = 00000000ada71900 R29 = 00000000ade5a300 R14 = 00000000ac2185a8 R30 = 00000000f663f238 R15 = 0000000000000004 R31 = 00000000f6624918 pc = c000000000050074 restore_gprs+0xc0/0x148 cfar= c00000000004fe28 dont_restore_vec+0x1c/0x1a4 lr = 00000000b52a8184 msr = 8000000100201030 cr = 24804888 ctr = 0000000000000000 xer = 0000000000000000 trap = 700 This moves tm_recheckpoint to a C function and moves the tm_restore_sprs into that function. It then adds IRQ disabling over the trechkpt critical section. It also sets the TEXASR FS in the signals code to ensure this is never set now that we explictly write the TM sprs in tm_recheckpoint. Signed-off-by: Michael Neuling <mikey@neuling.org> cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2014-04-04 17:19:48 +08:00
/* Make sure the transaction is marked as failed */
current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&current->thread, msr);
/* Get the top half of the MSR */
if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
return 1;
/* Pull in MSR TM from user context */
regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
/* This loads the speculative FP/VEC state, if used */
if (msr & MSR_FP) {
do_load_up_transact_fpu(&current->thread);
regs->msr |= (MSR_FP | current->thread.fpexc_mode);
}
#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
do_load_up_transact_altivec(&current->thread);
regs->msr |= MSR_VEC;
}
#endif
return 0;
}
#endif
#ifdef CONFIG_PPC64
int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
{
int err;
if (!access_ok (VERIFY_WRITE, d, sizeof(*d)))
return -EFAULT;
/* If you change siginfo_t structure, please be sure
* this code is fixed accordingly.
* It should never copy any pad contained in the structure
* to avoid security leaks, but must copy the generic
* 3 ints plus the relevant union member.
* This routine must convert siginfo from 64bit to 32bit as well
* at the same time.
*/
err = __put_user(s->si_signo, &d->si_signo);
err |= __put_user(s->si_errno, &d->si_errno);
err |= __put_user((short)s->si_code, &d->si_code);
if (s->si_code < 0)
err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad,
SI_PAD_SIZE32);
else switch(s->si_code >> 16) {
case __SI_CHLD >> 16:
err |= __put_user(s->si_pid, &d->si_pid);
err |= __put_user(s->si_uid, &d->si_uid);
err |= __put_user(s->si_utime, &d->si_utime);
err |= __put_user(s->si_stime, &d->si_stime);
err |= __put_user(s->si_status, &d->si_status);
break;
case __SI_FAULT >> 16:
err |= __put_user((unsigned int)(unsigned long)s->si_addr,
&d->si_addr);
break;
case __SI_POLL >> 16:
err |= __put_user(s->si_band, &d->si_band);
err |= __put_user(s->si_fd, &d->si_fd);
break;
case __SI_TIMER >> 16:
err |= __put_user(s->si_tid, &d->si_tid);
err |= __put_user(s->si_overrun, &d->si_overrun);
err |= __put_user(s->si_int, &d->si_int);
break;
case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
case __SI_MESGQ >> 16:
err |= __put_user(s->si_int, &d->si_int);
/* fallthrough */
case __SI_KILL >> 16:
default:
err |= __put_user(s->si_pid, &d->si_pid);
err |= __put_user(s->si_uid, &d->si_uid);
break;
}
return err;
}
#define copy_siginfo_to_user copy_siginfo_to_user32
int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
{
memset(to, 0, sizeof *to);
if (copy_from_user(to, from, 3*sizeof(int)) ||
copy_from_user(to->_sifields._pad,
from->_sifields._pad, SI_PAD_SIZE32))
return -EFAULT;
return 0;
}
#endif /* CONFIG_PPC64 */
/*
* Set up a signal frame for a "real-time" signal handler
* (one which gets siginfo).
*/
int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs)
{
struct rt_sigframe __user *rt_sf;
struct mcontext __user *frame;
struct mcontext __user *tm_frame = NULL;
void __user *addr;
unsigned long newsp = 0;
int sigret;
unsigned long tramp;
/* Set up Signal Frame */
/* Put a Real Time Context onto stack */
rt_sf = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*rt_sf), 1);
addr = rt_sf;
if (unlikely(rt_sf == NULL))
goto badframe;
/* Put the siginfo & fill in most of the ucontext */
if (copy_siginfo_to_user(&rt_sf->info, &ksig->info)
|| __put_user(0, &rt_sf->uc.uc_flags)
|| __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1])
|| __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext),
&rt_sf->uc.uc_regs)
|| put_sigset_t(&rt_sf->uc.uc_sigmask, oldset))
goto badframe;
/* Save user registers on the stack */
frame = &rt_sf->uc.uc_mcontext;
addr = frame;
if (vdso32_rt_sigtramp && current->mm->context.vdso_base) {
sigret = 0;
tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp;
} else {
sigret = __NR_rt_sigreturn;
tramp = (unsigned long) frame->tramp;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_frame = &rt_sf->uc_transact.uc_mcontext;
if (MSR_TM_ACTIVE(regs->msr)) {
if (__put_user((unsigned long)&rt_sf->uc_transact,
&rt_sf->uc.uc_link) ||
__put_user((unsigned long)tm_frame,
&rt_sf->uc_transact.uc_regs))
goto badframe;
if (save_tm_user_regs(regs, frame, tm_frame, sigret))
goto badframe;
}
else
#endif
{
if (__put_user(0, &rt_sf->uc.uc_link))
goto badframe;
if (save_user_regs(regs, frame, tm_frame, sigret, 1))
goto badframe;
}
regs->link = tramp;
current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
/* create a stack frame for the caller of the handler */
newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
addr = (void __user *)regs->gpr[1];
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
/* Fill registers for signal handler */
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
regs->gpr[4] = (unsigned long) &rt_sf->info;
regs->gpr[5] = (unsigned long) &rt_sf->uc;
regs->gpr[6] = (unsigned long) rt_sf;
regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
/* enter the signal handler in native-endian mode */
regs->msr &= ~MSR_LE;
regs->msr |= (MSR_KERNEL & MSR_LE);
return 0;
badframe:
if (show_unhandled_signals)
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in handle_rt_signal32: "
"%p nip %08lx lr %08lx\n",
current->comm, current->pid,
addr, regs->nip, regs->link);
return 1;
}
static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig)
{
sigset_t set;
struct mcontext __user *mcp;
if (get_sigset_t(&set, &ucp->uc_sigmask))
return -EFAULT;
#ifdef CONFIG_PPC64
{
u32 cmcp;
if (__get_user(cmcp, &ucp->uc_regs))
return -EFAULT;
mcp = (struct mcontext __user *)(u64)cmcp;
/* no need to check access_ok(mcp), since mcp < 4GB */
}
#else
if (__get_user(mcp, &ucp->uc_regs))
return -EFAULT;
if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
return -EFAULT;
#endif
set_current_blocked(&set);
if (restore_user_regs(regs, mcp, sig))
return -EFAULT;
return 0;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static int do_setcontext_tm(struct ucontext __user *ucp,
struct ucontext __user *tm_ucp,
struct pt_regs *regs)
{
sigset_t set;
struct mcontext __user *mcp;
struct mcontext __user *tm_mcp;
u32 cmcp;
u32 tm_cmcp;
if (get_sigset_t(&set, &ucp->uc_sigmask))
return -EFAULT;
if (__get_user(cmcp, &ucp->uc_regs) ||
__get_user(tm_cmcp, &tm_ucp->uc_regs))
return -EFAULT;
mcp = (struct mcontext __user *)(u64)cmcp;
tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
/* no need to check access_ok(mcp), since mcp < 4GB */
set_current_blocked(&set);
if (restore_tm_user_regs(regs, mcp, tm_mcp))
return -EFAULT;
return 0;
}
#endif
long sys_swapcontext(struct ucontext __user *old_ctx,
powerpc: Fix various syscall/signal/swapcontext bugs A careful reading of the recent changes to the system call entry/exit paths revealed several problems, plus some things that could be simplified and improved: * 32-bit wasn't testing the _TIF_NOERROR bit in the syscall fast exit path, so it was only doing anything with it once it saw some other bit being set. In other words, the noerror behaviour would apply to the next system call where we had to reschedule or deliver a signal, which is not necessarily the current system call. * 32-bit wasn't doing the call to ptrace_notify in the syscall exit path when the _TIF_SINGLESTEP bit was set. * _TIF_RESTOREALL was in both _TIF_USER_WORK_MASK and _TIF_PERSYSCALL_MASK, which is odd since _TIF_RESTOREALL is only set by system calls. I took it out of _TIF_USER_WORK_MASK. * On 64-bit, _TIF_RESTOREALL wasn't causing the non-volatile registers to be restored (unless perhaps a signal was delivered or the syscall was traced or single-stepped). Thus the non-volatile registers weren't restored on exit from a signal handler. We probably got away with it mostly because signal handlers written in C wouldn't alter the non-volatile registers. * On 32-bit I simplified the code and made it more like 64-bit by making the syscall exit path jump to ret_from_except to handle preemption and signal delivery. * 32-bit was calling do_signal unnecessarily when _TIF_RESTOREALL was set - but I think because of that 32-bit was actually restoring the non-volatile registers on exit from a signal handler. * I changed the order of enabling interrupts and saving the non-volatile registers before calling do_syscall_trace_leave; now we enable interrupts first. Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-03-08 10:24:22 +08:00
struct ucontext __user *new_ctx,
int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
{
unsigned char tmp;
int ctx_has_vsx_region = 0;
#ifdef CONFIG_PPC64
unsigned long new_msr = 0;
if (new_ctx) {
struct mcontext __user *mcp;
u32 cmcp;
/*
* Get pointer to the real mcontext. No need for
* access_ok since we are dealing with compat
* pointers.
*/
if (__get_user(cmcp, &new_ctx->uc_regs))
return -EFAULT;
mcp = (struct mcontext __user *)(u64)cmcp;
if (__get_user(new_msr, &mcp->mc_gregs[PT_MSR]))
return -EFAULT;
}
/*
* Check that the context is not smaller than the original
* size (with VMX but without VSX)
*/
if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
return -EINVAL;
/*
* If the new context state sets the MSR VSX bits but
* it doesn't provide VSX state.
*/
if ((ctx_size < sizeof(struct ucontext)) &&
(new_msr & MSR_VSX))
return -EINVAL;
/* Does the context have enough room to store VSX data? */
if (ctx_size >= sizeof(struct ucontext))
ctx_has_vsx_region = 1;
#else
/* Context size is for future use. Right now, we only make sure
* we are passed something we understand
*/
if (ctx_size < sizeof(struct ucontext))
return -EINVAL;
#endif
if (old_ctx != NULL) {
struct mcontext __user *mctx;
/*
* old_ctx might not be 16-byte aligned, in which
* case old_ctx->uc_mcontext won't be either.
* Because we have the old_ctx->uc_pad2 field
* before old_ctx->uc_mcontext, we need to round down
* from &old_ctx->uc_mcontext to a 16-byte boundary.
*/
mctx = (struct mcontext __user *)
((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
|| save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
|| put_sigset_t(&old_ctx->uc_sigmask, &current->blocked)
|| __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
return -EFAULT;
}
if (new_ctx == NULL)
return 0;
if (!access_ok(VERIFY_READ, new_ctx, ctx_size)
|| __get_user(tmp, (u8 __user *) new_ctx)
|| __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
return -EFAULT;
/*
* If we get a fault copying the context into the kernel's
* image of the user's registers, we can't just return -EFAULT
* because the user's registers will be corrupted. For instance
* the NIP value may have been updated but not some of the
* other registers. Given that we have done the access_ok
* and successfully read the first and last bytes of the region
* above, this should only happen in an out-of-memory situation
* or if another thread unmaps the region containing the context.
* We kill the task with a SIGSEGV in this situation.
*/
if (do_setcontext(new_ctx, regs, 0))
do_exit(SIGSEGV);
[PATCH] syscall entry/exit revamp This cleanup patch speeds up the null syscall path on ppc64 by about 3%, and brings the ppc32 and ppc64 code slightly closer together. The ppc64 code was checking current_thread_info()->flags twice in the syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after disabling interrupts. Now we do the same as ppc32 -- check the flags only once in the fast path, and re-enable interrupts if necessary in the ptrace case. The patch abolishes the 'syscall_noerror' member of struct thread_info and replaces it with a TIF_NOERROR bit in the flags, which is handled in the slow path. This shortens the syscall entry code, which no longer needs to clear syscall_noerror. The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow path to save the non-volatile GPRs into a signal frame. This removes the need for the assembly wrappers around sys_sigsuspend(), sys_rt_sigsuspend(), et al which existed solely to save those registers in advance. It also means I don't have to add new wrappers for ppoll() and pselect(), which is what I was supposed to be doing when I got distracted into this... Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit directly into a signal handler (as required by sigsuspend et al) by introducing a TIF_RESTOREALL flag which causes _all_ the registers to be reloaded from the pt_regs by taking the ret_from_exception path, instead of the normal syscall exit path which stomps on the callee-saved GPRs. It appears to pass an LTP test run on ppc64, and passes basic testing on ppc32 too. Brief tests of ptrace functionality with strace and gdb also appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :) Signed-off-by: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
set_thread_flag(TIF_RESTOREALL);
return 0;
}
long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
struct pt_regs *regs)
{
struct rt_sigframe __user *rt_sf;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
struct ucontext __user *uc_transact;
unsigned long msr_hi;
unsigned long tmp;
int tm_restore = 0;
#endif
/* Always make any pending restarted system calls return -EINTR */
all arches, signal: move restart_block to struct task_struct If an attacker can cause a controlled kernel stack overflow, overwriting the restart block is a very juicy exploit target. This is because the restart_block is held in the same memory allocation as the kernel stack. Moving the restart block to struct task_struct prevents this exploit by making the restart_block harder to locate. Note that there are other fields in thread_info that are also easy targets, at least on some architectures. It's also a decent simplification, since the restart code is more or less identical on all architectures. [james.hogan@imgtec.com: metag: align thread_info::supervisor_stack] Signed-off-by: Andy Lutomirski <luto@amacapital.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Kees Cook <keescook@chromium.org> Cc: David Miller <davem@davemloft.net> Acked-by: Richard Weinberger <richard@nod.at> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Matt Turner <mattst88@gmail.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Steven Miao <realmz6@gmail.com> Cc: Mark Salter <msalter@redhat.com> Cc: Aurelien Jacquiot <a-jacquiot@ti.com> Cc: Mikael Starvik <starvik@axis.com> Cc: Jesper Nilsson <jesper.nilsson@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Michal Simek <monstr@monstr.eu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Helge Deller <deller@gmx.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Tested-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Chen Liqin <liqin.linux@gmail.com> Cc: Lennox Wu <lennox.wu@gmail.com> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Chris Zankel <chris@zankel.net> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Guenter Roeck <linux@roeck-us.net> Signed-off-by: James Hogan <james.hogan@imgtec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-13 07:01:14 +08:00
current->restart_block.fn = do_no_restart_syscall;
rt_sf = (struct rt_sigframe __user *)
(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
goto bad;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (__get_user(tmp, &rt_sf->uc.uc_link))
goto bad;
uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
if (uc_transact) {
u32 cmcp;
struct mcontext __user *mcp;
if (__get_user(cmcp, &uc_transact->uc_regs))
return -EFAULT;
mcp = (struct mcontext __user *)(u64)cmcp;
/* The top 32 bits of the MSR are stashed in the transactional
* ucontext. */
if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
goto bad;
if (MSR_TM_ACTIVE(msr_hi<<32)) {
/* We only recheckpoint on return if we're
* transaction.
*/
tm_restore = 1;
if (do_setcontext_tm(&rt_sf->uc, uc_transact, regs))
goto bad;
}
}
if (!tm_restore)
/* Fall through, for non-TM restore */
#endif
if (do_setcontext(&rt_sf->uc, regs, 1))
goto bad;
/*
* It's not clear whether or why it is desirable to save the
* sigaltstack setting on signal delivery and restore it on
* signal return. But other architectures do this and we have
* always done it up until now so it is probably better not to
* change it. -- paulus
*/
#ifdef CONFIG_PPC64
if (compat_restore_altstack(&rt_sf->uc.uc_stack))
goto bad;
#else
if (restore_altstack(&rt_sf->uc.uc_stack))
goto bad;
#endif
[PATCH] syscall entry/exit revamp This cleanup patch speeds up the null syscall path on ppc64 by about 3%, and brings the ppc32 and ppc64 code slightly closer together. The ppc64 code was checking current_thread_info()->flags twice in the syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after disabling interrupts. Now we do the same as ppc32 -- check the flags only once in the fast path, and re-enable interrupts if necessary in the ptrace case. The patch abolishes the 'syscall_noerror' member of struct thread_info and replaces it with a TIF_NOERROR bit in the flags, which is handled in the slow path. This shortens the syscall entry code, which no longer needs to clear syscall_noerror. The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow path to save the non-volatile GPRs into a signal frame. This removes the need for the assembly wrappers around sys_sigsuspend(), sys_rt_sigsuspend(), et al which existed solely to save those registers in advance. It also means I don't have to add new wrappers for ppoll() and pselect(), which is what I was supposed to be doing when I got distracted into this... Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit directly into a signal handler (as required by sigsuspend et al) by introducing a TIF_RESTOREALL flag which causes _all_ the registers to be reloaded from the pt_regs by taking the ret_from_exception path, instead of the normal syscall exit path which stomps on the callee-saved GPRs. It appears to pass an LTP test run on ppc64, and passes basic testing on ppc32 too. Brief tests of ptrace functionality with strace and gdb also appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :) Signed-off-by: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
set_thread_flag(TIF_RESTOREALL);
return 0;
bad:
if (show_unhandled_signals)
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in sys_rt_sigreturn: "
"%p nip %08lx lr %08lx\n",
current->comm, current->pid,
rt_sf, regs->nip, regs->link);
force_sig(SIGSEGV, current);
return 0;
}
#ifdef CONFIG_PPC32
int sys_debug_setcontext(struct ucontext __user *ctx,
int ndbg, struct sig_dbg_op __user *dbg,
int r6, int r7, int r8,
struct pt_regs *regs)
{
struct sig_dbg_op op;
int i;
unsigned char tmp;
unsigned long new_msr = regs->msr;
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
unsigned long new_dbcr0 = current->thread.debug.dbcr0;
#endif
for (i=0; i<ndbg; i++) {
if (copy_from_user(&op, dbg + i, sizeof(op)))
return -EFAULT;
switch (op.dbg_type) {
case SIG_DBG_SINGLE_STEPPING:
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
if (op.dbg_value) {
new_msr |= MSR_DE;
new_dbcr0 |= (DBCR0_IDM | DBCR0_IC);
} else {
new_dbcr0 &= ~DBCR0_IC;
if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
current->thread.debug.dbcr1)) {
new_msr &= ~MSR_DE;
new_dbcr0 &= ~DBCR0_IDM;
}
}
#else
if (op.dbg_value)
new_msr |= MSR_SE;
else
new_msr &= ~MSR_SE;
#endif
break;
case SIG_DBG_BRANCH_TRACING:
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
return -EINVAL;
#else
if (op.dbg_value)
new_msr |= MSR_BE;
else
new_msr &= ~MSR_BE;
#endif
break;
default:
return -EINVAL;
}
}
/* We wait until here to actually install the values in the
registers so if we fail in the above loop, it will not
affect the contents of these registers. After this point,
failure is a problem, anyway, and it's very unlikely unless
the user is really doing something wrong. */
regs->msr = new_msr;
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
current->thread.debug.dbcr0 = new_dbcr0;
#endif
if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
|| __get_user(tmp, (u8 __user *) ctx)
|| __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
return -EFAULT;
/*
* If we get a fault copying the context into the kernel's
* image of the user's registers, we can't just return -EFAULT
* because the user's registers will be corrupted. For instance
* the NIP value may have been updated but not some of the
* other registers. Given that we have done the access_ok
* and successfully read the first and last bytes of the region
* above, this should only happen in an out-of-memory situation
* or if another thread unmaps the region containing the context.
* We kill the task with a SIGSEGV in this situation.
*/
if (do_setcontext(ctx, regs, 1)) {
if (show_unhandled_signals)
printk_ratelimited(KERN_INFO "%s[%d]: bad frame in "
"sys_debug_setcontext: %p nip %08lx "
"lr %08lx\n",
current->comm, current->pid,
ctx, regs->nip, regs->link);
force_sig(SIGSEGV, current);
goto out;
}
/*
* It's not clear whether or why it is desirable to save the
* sigaltstack setting on signal delivery and restore it on
* signal return. But other architectures do this and we have
* always done it up until now so it is probably better not to
* change it. -- paulus
*/
restore_altstack(&ctx->uc_stack);
[PATCH] syscall entry/exit revamp This cleanup patch speeds up the null syscall path on ppc64 by about 3%, and brings the ppc32 and ppc64 code slightly closer together. The ppc64 code was checking current_thread_info()->flags twice in the syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after disabling interrupts. Now we do the same as ppc32 -- check the flags only once in the fast path, and re-enable interrupts if necessary in the ptrace case. The patch abolishes the 'syscall_noerror' member of struct thread_info and replaces it with a TIF_NOERROR bit in the flags, which is handled in the slow path. This shortens the syscall entry code, which no longer needs to clear syscall_noerror. The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow path to save the non-volatile GPRs into a signal frame. This removes the need for the assembly wrappers around sys_sigsuspend(), sys_rt_sigsuspend(), et al which existed solely to save those registers in advance. It also means I don't have to add new wrappers for ppoll() and pselect(), which is what I was supposed to be doing when I got distracted into this... Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit directly into a signal handler (as required by sigsuspend et al) by introducing a TIF_RESTOREALL flag which causes _all_ the registers to be reloaded from the pt_regs by taking the ret_from_exception path, instead of the normal syscall exit path which stomps on the callee-saved GPRs. It appears to pass an LTP test run on ppc64, and passes basic testing on ppc32 too. Brief tests of ptrace functionality with strace and gdb also appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :) Signed-off-by: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
set_thread_flag(TIF_RESTOREALL);
out:
return 0;
}
#endif
/*
* OK, we're invoking a handler
*/
int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs)
{
struct sigcontext __user *sc;
struct sigframe __user *frame;
struct mcontext __user *tm_mctx = NULL;
unsigned long newsp = 0;
int sigret;
unsigned long tramp;
/* Set up Signal Frame */
frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 1);
if (unlikely(frame == NULL))
goto badframe;
sc = (struct sigcontext __user *) &frame->sctx;
#if _NSIG != 64
#error "Please adjust handle_signal()"
#endif
if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler)
|| __put_user(oldset->sig[0], &sc->oldmask)
#ifdef CONFIG_PPC64
|| __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
#else
|| __put_user(oldset->sig[1], &sc->_unused[3])
#endif
|| __put_user(to_user_ptr(&frame->mctx), &sc->regs)
|| __put_user(ksig->sig, &sc->signal))
goto badframe;
if (vdso32_sigtramp && current->mm->context.vdso_base) {
sigret = 0;
tramp = current->mm->context.vdso_base + vdso32_sigtramp;
} else {
sigret = __NR_sigreturn;
tramp = (unsigned long) frame->mctx.tramp;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_mctx = &frame->mctx_transact;
if (MSR_TM_ACTIVE(regs->msr)) {
if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
sigret))
goto badframe;
}
else
#endif
{
if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1))
goto badframe;
}
regs->link = tramp;
current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
/* create a stack frame for the caller of the handler */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
regs->gpr[4] = (unsigned long) sc;
regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler;
/* enter the signal handler in big-endian mode */
regs->msr &= ~MSR_LE;
return 0;
badframe:
if (show_unhandled_signals)
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in handle_signal32: "
"%p nip %08lx lr %08lx\n",
current->comm, current->pid,
frame, regs->nip, regs->link);
return 1;
}
/*
* Do a signal return; undo the signal stack.
*/
long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
struct pt_regs *regs)
{
struct sigframe __user *sf;
struct sigcontext __user *sc;
struct sigcontext sigctx;
struct mcontext __user *sr;
void __user *addr;
sigset_t set;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
struct mcontext __user *mcp, *tm_mcp;
unsigned long msr_hi;
#endif
/* Always make any pending restarted system calls return -EINTR */
all arches, signal: move restart_block to struct task_struct If an attacker can cause a controlled kernel stack overflow, overwriting the restart block is a very juicy exploit target. This is because the restart_block is held in the same memory allocation as the kernel stack. Moving the restart block to struct task_struct prevents this exploit by making the restart_block harder to locate. Note that there are other fields in thread_info that are also easy targets, at least on some architectures. It's also a decent simplification, since the restart code is more or less identical on all architectures. [james.hogan@imgtec.com: metag: align thread_info::supervisor_stack] Signed-off-by: Andy Lutomirski <luto@amacapital.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Kees Cook <keescook@chromium.org> Cc: David Miller <davem@davemloft.net> Acked-by: Richard Weinberger <richard@nod.at> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Matt Turner <mattst88@gmail.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Steven Miao <realmz6@gmail.com> Cc: Mark Salter <msalter@redhat.com> Cc: Aurelien Jacquiot <a-jacquiot@ti.com> Cc: Mikael Starvik <starvik@axis.com> Cc: Jesper Nilsson <jesper.nilsson@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Michal Simek <monstr@monstr.eu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Helge Deller <deller@gmx.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Tested-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Chen Liqin <liqin.linux@gmail.com> Cc: Lennox Wu <lennox.wu@gmail.com> Cc: Chris Metcalf <cmetcalf@ezchip.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Chris Zankel <chris@zankel.net> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Guenter Roeck <linux@roeck-us.net> Signed-off-by: James Hogan <james.hogan@imgtec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-02-13 07:01:14 +08:00
current->restart_block.fn = do_no_restart_syscall;
sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
sc = &sf->sctx;
addr = sc;
if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
goto badframe;
#ifdef CONFIG_PPC64
/*
* Note that PPC32 puts the upper 32 bits of the sigmask in the
* unused part of the signal stackframe
*/
set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
#else
set.sig[0] = sigctx.oldmask;
set.sig[1] = sigctx._unused[3];
#endif
set_current_blocked(&set);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
mcp = (struct mcontext __user *)&sf->mctx;
tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
goto badframe;
if (MSR_TM_ACTIVE(msr_hi<<32)) {
if (!cpu_has_feature(CPU_FTR_TM))
goto badframe;
if (restore_tm_user_regs(regs, mcp, tm_mcp))
goto badframe;
} else
#endif
{
sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
addr = sr;
if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
|| restore_user_regs(regs, sr, 1))
goto badframe;
}
[PATCH] syscall entry/exit revamp This cleanup patch speeds up the null syscall path on ppc64 by about 3%, and brings the ppc32 and ppc64 code slightly closer together. The ppc64 code was checking current_thread_info()->flags twice in the syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after disabling interrupts. Now we do the same as ppc32 -- check the flags only once in the fast path, and re-enable interrupts if necessary in the ptrace case. The patch abolishes the 'syscall_noerror' member of struct thread_info and replaces it with a TIF_NOERROR bit in the flags, which is handled in the slow path. This shortens the syscall entry code, which no longer needs to clear syscall_noerror. The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow path to save the non-volatile GPRs into a signal frame. This removes the need for the assembly wrappers around sys_sigsuspend(), sys_rt_sigsuspend(), et al which existed solely to save those registers in advance. It also means I don't have to add new wrappers for ppoll() and pselect(), which is what I was supposed to be doing when I got distracted into this... Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit directly into a signal handler (as required by sigsuspend et al) by introducing a TIF_RESTOREALL flag which causes _all_ the registers to be reloaded from the pt_regs by taking the ret_from_exception path, instead of the normal syscall exit path which stomps on the callee-saved GPRs. It appears to pass an LTP test run on ppc64, and passes basic testing on ppc32 too. Brief tests of ptrace functionality with strace and gdb also appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :) Signed-off-by: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2005-11-16 02:52:18 +08:00
set_thread_flag(TIF_RESTOREALL);
return 0;
badframe:
if (show_unhandled_signals)
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in sys_sigreturn: "
"%p nip %08lx lr %08lx\n",
current->comm, current->pid,
addr, regs->nip, regs->link);
force_sig(SIGSEGV, current);
return 0;
}