powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI
The gcc switch -mprofile-kernel defines a new ABI for calling _mcount() very early in the function with minimal overhead. Although mprofile-kernel has been available since GCC 3.4, there were bugs which were only fixed recently. Currently it is known to work in GCC 4.9, 5 and 6. Additionally there are two possible code sequences generated by the flag, the first uses mflr/std/bl and the second is optimised to omit the std. Currently only gcc 6 has the optimised sequence. This patch supports both sequences. Initial work started by Vojtech Pavlik, used with permission. Key changes: - rework _mcount() to work for both the old and new ABIs. - implement new versions of ftrace_caller() and ftrace_graph_caller() which deal with the new ABI. - updates to __ftrace_make_nop() to recognise the new mcount calling sequence. - updates to __ftrace_make_call() to recognise the nop'ed sequence. - implement ftrace_modify_call(). - updates to the module loader to surpress the toc save in the module stub when calling mcount with the new ABI. Reviewed-by: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Torsten Duwe <duwe@suse.de> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
9a7841ae8d
commit
153086644f
|
@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
/*
|
||||
* Some instruction encodings commonly used in dynamic ftracing
|
||||
* and function live patching.
|
||||
*/
|
||||
|
||||
/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
|
||||
#if defined(_CALL_ELF) && _CALL_ELF == 2
|
||||
#define R2_STACK_OFFSET 24
|
||||
#else
|
||||
#define R2_STACK_OFFSET 40
|
||||
#endif
|
||||
|
||||
#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \
|
||||
___PPC_RA(__REG_R1) | R2_STACK_OFFSET)
|
||||
|
||||
/* usually preceded by a mflr r0 */
|
||||
#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \
|
||||
___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
|
||||
#endif /* CONFIG_PPC64 */
|
||||
|
||||
#endif /* _ASM_POWERPC_CODE_PATCHING_H */
|
||||
|
|
|
@ -46,6 +46,8 @@
|
|||
extern void _mcount(void);
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
# define FTRACE_ADDR ((unsigned long)ftrace_caller)
|
||||
# define FTRACE_REGS_ADDR FTRACE_ADDR
|
||||
static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
||||
{
|
||||
/* reloction of mcount call site is the same as the address */
|
||||
|
@ -58,6 +60,9 @@ struct dyn_arch_ftrace {
|
|||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
||||
#define ARCH_SUPPORTS_FTRACE_OPS 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
|
||||
|
|
|
@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom)
|
|||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
_GLOBAL(mcount)
|
||||
_GLOBAL(_mcount)
|
||||
blr
|
||||
mflr r12
|
||||
mtctr r12
|
||||
mtlr r0
|
||||
bctr
|
||||
|
||||
#ifndef CC_USING_MPROFILE_KERNEL
|
||||
_GLOBAL_TOC(ftrace_caller)
|
||||
/* Taken from output of objdump from lib64/glibc */
|
||||
mflr r3
|
||||
|
@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub)
|
|||
ld r0, 128(r1)
|
||||
mtlr r0
|
||||
addi r1, r1, 112
|
||||
|
||||
#else /* CC_USING_MPROFILE_KERNEL */
|
||||
/*
|
||||
*
|
||||
* ftrace_caller() is the function that replaces _mcount() when ftrace is
|
||||
* active.
|
||||
*
|
||||
* We arrive here after a function A calls function B, and we are the trace
|
||||
* function for B. When we enter r1 points to A's stack frame, B has not yet
|
||||
* had a chance to allocate one yet.
|
||||
*
|
||||
* Additionally r2 may point either to the TOC for A, or B, depending on
|
||||
* whether B did a TOC setup sequence before calling us.
|
||||
*
|
||||
* On entry the LR points back to the _mcount() call site, and r0 holds the
|
||||
* saved LR as it was on entry to B, ie. the original return address at the
|
||||
* call site in A.
|
||||
*
|
||||
* Our job is to save the register state into a struct pt_regs (on the stack)
|
||||
* and then arrange for the ftrace function to be called.
|
||||
*/
|
||||
_GLOBAL(ftrace_caller)
|
||||
/* Save the original return address in A's stack frame */
|
||||
std r0,LRSAVE(r1)
|
||||
|
||||
/* Create our stack frame + pt_regs */
|
||||
stdu r1,-SWITCH_FRAME_SIZE(r1)
|
||||
|
||||
/* Save all gprs to pt_regs */
|
||||
SAVE_8GPRS(0,r1)
|
||||
SAVE_8GPRS(8,r1)
|
||||
SAVE_8GPRS(16,r1)
|
||||
SAVE_8GPRS(24,r1)
|
||||
|
||||
/* Load special regs for save below */
|
||||
mfmsr r8
|
||||
mfctr r9
|
||||
mfxer r10
|
||||
mfcr r11
|
||||
|
||||
/* Get the _mcount() call site out of LR */
|
||||
mflr r7
|
||||
/* Save it as pt_regs->nip & pt_regs->link */
|
||||
std r7, _NIP(r1)
|
||||
std r7, _LINK(r1)
|
||||
|
||||
/* Save callee's TOC in the ABI compliant location */
|
||||
std r2, 24(r1)
|
||||
ld r2,PACATOC(r13) /* get kernel TOC in r2 */
|
||||
|
||||
addis r3,r2,function_trace_op@toc@ha
|
||||
addi r3,r3,function_trace_op@toc@l
|
||||
ld r5,0(r3)
|
||||
|
||||
/* Calculate ip from nip-4 into r3 for call below */
|
||||
subi r3, r7, MCOUNT_INSN_SIZE
|
||||
|
||||
/* Put the original return address in r4 as parent_ip */
|
||||
mr r4, r0
|
||||
|
||||
/* Save special regs */
|
||||
std r8, _MSR(r1)
|
||||
std r9, _CTR(r1)
|
||||
std r10, _XER(r1)
|
||||
std r11, _CCR(r1)
|
||||
|
||||
/* Load &pt_regs in r6 for call below */
|
||||
addi r6, r1 ,STACK_FRAME_OVERHEAD
|
||||
|
||||
/* ftrace_call(r3, r4, r5, r6) */
|
||||
.globl ftrace_call
|
||||
ftrace_call:
|
||||
bl ftrace_stub
|
||||
nop
|
||||
|
||||
/* Load ctr with the possibly modified NIP */
|
||||
ld r3, _NIP(r1)
|
||||
mtctr r3
|
||||
|
||||
/* Restore gprs */
|
||||
REST_8GPRS(0,r1)
|
||||
REST_8GPRS(8,r1)
|
||||
REST_8GPRS(16,r1)
|
||||
REST_8GPRS(24,r1)
|
||||
|
||||
/* Restore callee's TOC */
|
||||
ld r2, 24(r1)
|
||||
|
||||
/* Pop our stack frame */
|
||||
addi r1, r1, SWITCH_FRAME_SIZE
|
||||
|
||||
/* Restore original LR for return to B */
|
||||
ld r0, LRSAVE(r1)
|
||||
mtlr r0
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
stdu r1, -112(r1)
|
||||
.globl ftrace_graph_call
|
||||
ftrace_graph_call:
|
||||
b ftrace_graph_stub
|
||||
_GLOBAL(ftrace_graph_stub)
|
||||
addi r1, r1, 112
|
||||
#endif
|
||||
|
||||
ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */
|
||||
mtlr r0
|
||||
bctr /* jump after _mcount site */
|
||||
#endif /* CC_USING_MPROFILE_KERNEL */
|
||||
|
||||
_GLOBAL(ftrace_stub)
|
||||
blr
|
||||
#else
|
||||
|
@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub)
|
|||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
#ifndef CC_USING_MPROFILE_KERNEL
|
||||
_GLOBAL(ftrace_graph_caller)
|
||||
/* load r4 with local address */
|
||||
ld r4, 128(r1)
|
||||
|
@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller)
|
|||
addi r1, r1, 112
|
||||
blr
|
||||
|
||||
#else /* CC_USING_MPROFILE_KERNEL */
|
||||
_GLOBAL(ftrace_graph_caller)
|
||||
/* with -mprofile-kernel, parameter regs are still alive at _mcount */
|
||||
std r10, 104(r1)
|
||||
std r9, 96(r1)
|
||||
std r8, 88(r1)
|
||||
std r7, 80(r1)
|
||||
std r6, 72(r1)
|
||||
std r5, 64(r1)
|
||||
std r4, 56(r1)
|
||||
std r3, 48(r1)
|
||||
|
||||
/* Save callee's TOC in the ABI compliant location */
|
||||
std r2, 24(r1)
|
||||
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
|
||||
|
||||
mfctr r4 /* ftrace_caller has moved local addr here */
|
||||
std r4, 40(r1)
|
||||
mflr r3 /* ftrace_caller has restored LR from stack */
|
||||
subi r4, r4, MCOUNT_INSN_SIZE
|
||||
|
||||
bl prepare_ftrace_return
|
||||
nop
|
||||
|
||||
/*
|
||||
* prepare_ftrace_return gives us the address we divert to.
|
||||
* Change the LR to this.
|
||||
*/
|
||||
mtlr r3
|
||||
|
||||
ld r0, 40(r1)
|
||||
mtctr r0
|
||||
ld r10, 104(r1)
|
||||
ld r9, 96(r1)
|
||||
ld r8, 88(r1)
|
||||
ld r7, 80(r1)
|
||||
ld r6, 72(r1)
|
||||
ld r5, 64(r1)
|
||||
ld r4, 56(r1)
|
||||
ld r3, 48(r1)
|
||||
|
||||
/* Restore callee's TOC */
|
||||
ld r2, 24(r1)
|
||||
|
||||
addi r1, r1, 112
|
||||
mflr r0
|
||||
std r0, LRSAVE(r1)
|
||||
bctr
|
||||
#endif /* CC_USING_MPROFILE_KERNEL */
|
||||
|
||||
_GLOBAL(return_to_handler)
|
||||
/* need to save return values */
|
||||
std r4, -32(r1)
|
||||
|
|
|
@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
|
|||
return -EFAULT;
|
||||
|
||||
/* Make sure it is what we expect it to be */
|
||||
if (replaced != old)
|
||||
if (replaced != old) {
|
||||
pr_err("%p: replaced (%#x) != old (%#x)",
|
||||
(void *)ip, replaced, old);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* replace the text with the new text */
|
||||
if (patch_instruction((unsigned int *)ip, new))
|
||||
|
@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod,
|
|||
{
|
||||
unsigned long entry, ptr, tramp;
|
||||
unsigned long ip = rec->ip;
|
||||
unsigned int op;
|
||||
unsigned int op, pop;
|
||||
|
||||
/* read where this goes */
|
||||
if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
|
||||
if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
|
||||
pr_err("Fetching opcode failed.\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* Make sure that that this is still a 24bit jump */
|
||||
if (!is_bl_op(op)) {
|
||||
|
@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod,
|
|||
*
|
||||
* Use a b +8 to jump over the load.
|
||||
*/
|
||||
op = 0x48000008; /* b +8 */
|
||||
|
||||
if (patch_instruction((unsigned int *)ip, op))
|
||||
pop = PPC_INST_BRANCH | 8; /* b +8 */
|
||||
|
||||
/*
|
||||
* Check what is in the next instruction. We can see ld r2,40(r1), but
|
||||
* on first pass after boot we will see mflr r0.
|
||||
*/
|
||||
if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
|
||||
pr_err("Fetching op failed.\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (op != PPC_INST_LD_TOC) {
|
||||
unsigned int inst;
|
||||
|
||||
if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) {
|
||||
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */
|
||||
if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) {
|
||||
pr_err("Unexpected instructions around bl _mcount\n"
|
||||
"when enabling dynamic ftrace!\t"
|
||||
"(%08x,bl,%08x)\n", inst, op);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* When using -mkernel_profile there is no load to jump over */
|
||||
pop = PPC_INST_NOP;
|
||||
}
|
||||
|
||||
if (patch_instruction((unsigned int *)ip, pop)) {
|
||||
pr_err("Patching NOP failed.\n");
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -281,6 +318,39 @@ int ftrace_make_nop(struct module *mod,
|
|||
|
||||
#ifdef CONFIG_MODULES
|
||||
#ifdef CONFIG_PPC64
|
||||
/*
|
||||
* Examine the existing instructions for __ftrace_make_call.
|
||||
* They should effectively be a NOP, and follow formal constraints,
|
||||
* depending on the ABI. Return false if they don't.
|
||||
*/
|
||||
#ifndef CC_USING_MPROFILE_KERNEL
|
||||
static int
|
||||
expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
|
||||
{
|
||||
/*
|
||||
* We expect to see:
|
||||
*
|
||||
* b +8
|
||||
* ld r2,XX(r1)
|
||||
*
|
||||
* The load offset is different depending on the ABI. For simplicity
|
||||
* just mask it out when doing the compare.
|
||||
*/
|
||||
if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
static int
|
||||
expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
|
||||
{
|
||||
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
|
||||
if (op0 != PPC_INST_NOP)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
|
@ -291,17 +361,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
|||
if (probe_kernel_read(op, ip, sizeof(op)))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* We expect to see:
|
||||
*
|
||||
* b +8
|
||||
* ld r2,XX(r1)
|
||||
*
|
||||
* The load offset is different depending on the ABI. For simplicity
|
||||
* just mask it out when doing the compare.
|
||||
*/
|
||||
if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
|
||||
pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
|
||||
if (!expected_nop_sequence(ip, op[0], op[1])) {
|
||||
pr_err("Unexpected call sequence at %p: %x %x\n",
|
||||
ip, op[0], op[1]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
|||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
||||
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
|
||||
unsigned long addr)
|
||||
{
|
||||
return ftrace_make_call(rec, addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* !CONFIG_PPC64: */
|
||||
static int
|
||||
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
|
|
|
@ -42,7 +42,6 @@
|
|||
--RR. */
|
||||
|
||||
#if defined(_CALL_ELF) && _CALL_ELF == 2
|
||||
#define R2_STACK_OFFSET 24
|
||||
|
||||
/* An address is simply the address of the function. */
|
||||
typedef unsigned long func_desc_t;
|
||||
|
@ -74,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
|
|||
return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
|
||||
}
|
||||
#else
|
||||
#define R2_STACK_OFFSET 40
|
||||
|
||||
/* An address is address of the OPD entry, which contains address of fn. */
|
||||
typedef struct ppc64_opd_entry func_desc_t;
|
||||
|
@ -451,17 +449,60 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
|
|||
return (unsigned long)&stubs[i];
|
||||
}
|
||||
|
||||
#ifdef CC_USING_MPROFILE_KERNEL
|
||||
static bool is_early_mcount_callsite(u32 *instruction)
|
||||
{
|
||||
/*
|
||||
* Check if this is one of the -mprofile-kernel sequences.
|
||||
*/
|
||||
if (instruction[-1] == PPC_INST_STD_LR &&
|
||||
instruction[-2] == PPC_INST_MFLR)
|
||||
return true;
|
||||
|
||||
if (instruction[-1] == PPC_INST_MFLR)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* In case of _mcount calls, do not save the current callee's TOC (in r2) into
|
||||
* the original caller's stack frame. If we did we would clobber the saved TOC
|
||||
* value of the original caller.
|
||||
*/
|
||||
static void squash_toc_save_inst(const char *name, unsigned long addr)
|
||||
{
|
||||
struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr;
|
||||
|
||||
/* Only for calls to _mcount */
|
||||
if (strcmp("_mcount", name) != 0)
|
||||
return;
|
||||
|
||||
stub->jump[2] = PPC_INST_NOP;
|
||||
}
|
||||
#else
|
||||
static void squash_toc_save_inst(const char *name, unsigned long addr) { }
|
||||
|
||||
/* without -mprofile-kernel, mcount calls are never early */
|
||||
static bool is_early_mcount_callsite(u32 *instruction)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We expect a noop next: if it is, replace it with instruction to
|
||||
restore r2. */
|
||||
static int restore_r2(u32 *instruction, struct module *me)
|
||||
{
|
||||
if (*instruction != PPC_INST_NOP) {
|
||||
if (is_early_mcount_callsite(instruction - 1))
|
||||
return 1;
|
||||
pr_err("%s: Expect noop after relocate, got %08x\n",
|
||||
me->name, *instruction);
|
||||
return 0;
|
||||
}
|
||||
/* ld r2,R2_STACK_OFFSET(r1) */
|
||||
*instruction = 0xe8410000 | R2_STACK_OFFSET;
|
||||
*instruction = PPC_INST_LD_TOC;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -586,6 +627,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
|
|||
return -ENOENT;
|
||||
if (!restore_r2((u32 *)location + 1, me))
|
||||
return -ENOEXEC;
|
||||
|
||||
squash_toc_save_inst(strtab + sym->st_name, value);
|
||||
} else
|
||||
value += local_entry_offset(sym);
|
||||
|
||||
|
|
Loading…
Reference in New Issue