linux/arch/arm64/kernel/stacktrace.c

216 lines
5.1 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Stack tracing support
*
* Copyright (C) 2012 ARM Ltd.
*/
#include <linux/kernel.h>
#include <linux/export.h>
arm64: ftrace: fix a stack tracer's output under function graph tracer Function graph tracer modifies a return address (LR) in a stack frame to hook a function return. This will result in many useless entries (return_to_handler) showing up in a) a stack tracer's output b) perf call graph (with perf record -g) c) dump_backtrace (at panic et al.) For example, in case of a), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo 1 > /proc/sys/kernel/stack_trace_enabled $ cat /sys/kernel/debug/tracing/stack_trace Depth Size Location (54 entries) ----- ---- -------- 0) 4504 16 gic_raise_softirq+0x28/0x150 1) 4488 80 smp_cross_call+0x38/0xb8 2) 4408 48 return_to_handler+0x0/0x40 3) 4360 32 return_to_handler+0x0/0x40 ... In case of b), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ perf record -e mem:XXX:x -ag -- sleep 10 $ perf report ... | | |--0.22%-- 0x550f8 | | | 0x10888 | | | el0_svc_naked | | | sys_openat | | | return_to_handler | | | return_to_handler ... In case of c), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo c > /proc/sysrq-trigger ... Call trace: [<ffffffc00044d3ac>] sysrq_handle_crash+0x24/0x30 [<ffffffc000092250>] return_to_handler+0x0/0x40 [<ffffffc000092250>] return_to_handler+0x0/0x40 ... This patch replaces such entries with real addresses preserved in current->ret_stack[] at unwind_frame(). This way, we can cover all the cases. Reviewed-by: Jungseok Lee <jungseoklee85@gmail.com> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> [will: fixed minor context changes conflicting with irq stack bits] Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-12-15 16:33:41 +08:00
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/irq.h>
#include <asm/pointer_auth.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
* AArch64 PCS assigns the frame pointer to x29.
*
* A simple function prologue looks like this:
* sub sp, sp, #0x10
* stp x29, x30, [sp]
* mov x29, sp
*
* A simple function epilogue looks like this:
* mov sp, x29
* ldp x29, x30, [sp]
* add sp, sp, #0x10
*/
/*
* Unwind from one frame record (A) to the next frame record (B).
*
* We terminate early if the location of B indicates a malformed chain of frame
* records (e.g. a cycle), determined based on the location and fp value of A
* and the location (but not the fp value) of B.
*/
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
{
unsigned long fp = frame->fp;
struct stack_info info;
arm64: remove EL0 exception frame record When entering an exception from EL0, the entry code creates a synthetic frame record with a NULL PC. This was used by the code introduced in commit: 7326749801396105 ("arm64: unwind: reference pt_regs via embedded stack frame") ... to discover exception entries on the stack and dump the associated pt_regs. Since the NULL PC was undesirable for the stacktrace, we added a special case to unwind_frame() to prevent the NULL PC from being logged. Since commit: a25ffd3a6302a678 ("arm64: traps: Don't print stack or raw PC/LR values in backtraces") ... we no longer try to dump the pt_regs as part of a stacktrace, and hence no longer need the synthetic exception record. This patch removes the synthetic exception record and the associated special case in unwind_frame(). Instead, EL0 exceptions set the FP to NULL, as is the case for other terminal records (e.g. when a kernel thread starts). The synthetic record for exceptions from EL1 is retrained as this has useful unwind information for the interrupted context. To make the terminal case a bit clearer, an explicit check is added to the start of unwind_frame(). This would otherwise be caught implicitly by the on_accessible_stack() checks. Reported-by: Mark Brown <broonie@kernel.org> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Mark Brown <broonie@kernel.org> Link: https://lore.kernel.org/r/20210113173155.43063-1-broonie@kernel.org Signed-off-by: Will Deacon <will@kernel.org>
2021-01-14 01:31:55 +08:00
/* Terminal record; nothing to unwind */
if (!fp)
return -ENOENT;
arm64: remove EL0 exception frame record When entering an exception from EL0, the entry code creates a synthetic frame record with a NULL PC. This was used by the code introduced in commit: 7326749801396105 ("arm64: unwind: reference pt_regs via embedded stack frame") ... to discover exception entries on the stack and dump the associated pt_regs. Since the NULL PC was undesirable for the stacktrace, we added a special case to unwind_frame() to prevent the NULL PC from being logged. Since commit: a25ffd3a6302a678 ("arm64: traps: Don't print stack or raw PC/LR values in backtraces") ... we no longer try to dump the pt_regs as part of a stacktrace, and hence no longer need the synthetic exception record. This patch removes the synthetic exception record and the associated special case in unwind_frame(). Instead, EL0 exceptions set the FP to NULL, as is the case for other terminal records (e.g. when a kernel thread starts). The synthetic record for exceptions from EL1 is retrained as this has useful unwind information for the interrupted context. To make the terminal case a bit clearer, an explicit check is added to the start of unwind_frame(). This would otherwise be caught implicitly by the on_accessible_stack() checks. Reported-by: Mark Brown <broonie@kernel.org> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Mark Brown <broonie@kernel.org> Link: https://lore.kernel.org/r/20210113173155.43063-1-broonie@kernel.org Signed-off-by: Will Deacon <will@kernel.org>
2021-01-14 01:31:55 +08:00
if (fp & 0xf)
return -EINVAL;
arm64: fix dump_backtrace/unwind_frame with NULL tsk In some places, dump_backtrace() is called with a NULL tsk parameter, e.g. in bug_handler() in arch/arm64, or indirectly via show_stack() in core code. The expectation is that this is treated as if current were passed instead of NULL. Similar is true of unwind_frame(). Commit a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") didn't take this into account. In dump_backtrace() it compares tsk against current *before* we check if tsk is NULL, and in unwind_frame() we never set tsk if it is NULL. Due to this, we won't initialise irq_stack_ptr in either function. In dump_backtrace() this results in calling dump_mem() for memory immediately above the IRQ stack range, rather than for the relevant range on the task stack. In unwind_frame we'll reject unwinding frames on the IRQ stack. In either case this results in incomplete or misleading backtrace information, but is not otherwise problematic. The initial percpu areas (including the IRQ stacks) are allocated in the linear map, and dump_mem uses __get_user(), so we shouldn't access anything with side-effects, and will handle holes safely. This patch fixes the issue by having both functions handle the NULL tsk case before doing anything else with tsk. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Fixes: a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") Acked-by: James Morse <james.morse@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Yang Shi <yang.shi@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-09-24 00:55:05 +08:00
if (!tsk)
tsk = current;
if (!on_accessible_stack(tsk, fp, &info))
return -EINVAL;
if (test_bit(info.type, frame->stacks_done))
return -EINVAL;
/*
* As stacks grow downward, any valid record on the same stack must be
* at a strictly higher address than the prior record.
*
* Stacks can nest in several valid orders, e.g.
*
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
*
* ... but the nesting itself is strict. Once we transition from one
* stack to another, it's never valid to unwind back to that first
* stack.
*/
if (info.type == frame->prev_type) {
if (fp <= frame->prev_fp)
return -EINVAL;
} else {
set_bit(frame->prev_type, frame->stacks_done);
}
/*
* Record this frame record's values and location. The prev_fp and
* prev_type are only meaningful to the next unwind_frame() invocation.
*/
arm64: disable kasan when accessing frame->fp in unwind_frame When boot arm64 kernel with KASAN enabled, the below error is reported by kasan: BUG: KASAN: out-of-bounds in unwind_frame+0xec/0x260 at addr ffffffc064d57ba0 Read of size 8 by task pidof/499 page:ffffffbdc39355c0 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x0() page dumped because: kasan: bad access detected CPU: 2 PID: 499 Comm: pidof Not tainted 4.5.0-rc1 #119 Hardware name: Freescale Layerscape 2085a RDB Board (DT) Call trace: [<ffffffc00008d078>] dump_backtrace+0x0/0x290 [<ffffffc00008d32c>] show_stack+0x24/0x30 [<ffffffc0006a981c>] dump_stack+0x8c/0xd8 [<ffffffc0002e4400>] kasan_report_error+0x558/0x588 [<ffffffc0002e4958>] kasan_report+0x60/0x70 [<ffffffc0002e3188>] __asan_load8+0x60/0x78 [<ffffffc00008c92c>] unwind_frame+0xec/0x260 [<ffffffc000087e60>] get_wchan+0x110/0x160 [<ffffffc0003b647c>] do_task_stat+0xb44/0xb68 [<ffffffc0003b7730>] proc_tgid_stat+0x40/0x50 [<ffffffc0003ac840>] proc_single_show+0x88/0xd8 [<ffffffc000345be8>] seq_read+0x370/0x770 [<ffffffc00030aba0>] __vfs_read+0xc8/0x1d8 [<ffffffc00030c0ec>] vfs_read+0x94/0x168 [<ffffffc00030d458>] SyS_read+0xb8/0x128 [<ffffffc000086530>] el0_svc_naked+0x24/0x28 Memory state around the buggy address: ffffffc064d57a80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 f4 f4 ffffffc064d57b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffffc064d57b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffffffc064d57c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffffffc064d57c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Since the shadow byte pointed by the report is 0, so it may mean it is just hit oob in non-current task. So, disable the instrumentation to silence these warnings. Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Signed-off-by: Yang Shi <yang.shi@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-02-09 01:13:09 +08:00
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
frame->prev_fp = fp;
frame->prev_type = info.type;
arm64: ftrace: fix a stack tracer's output under function graph tracer Function graph tracer modifies a return address (LR) in a stack frame to hook a function return. This will result in many useless entries (return_to_handler) showing up in a) a stack tracer's output b) perf call graph (with perf record -g) c) dump_backtrace (at panic et al.) For example, in case of a), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo 1 > /proc/sys/kernel/stack_trace_enabled $ cat /sys/kernel/debug/tracing/stack_trace Depth Size Location (54 entries) ----- ---- -------- 0) 4504 16 gic_raise_softirq+0x28/0x150 1) 4488 80 smp_cross_call+0x38/0xb8 2) 4408 48 return_to_handler+0x0/0x40 3) 4360 32 return_to_handler+0x0/0x40 ... In case of b), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ perf record -e mem:XXX:x -ag -- sleep 10 $ perf report ... | | |--0.22%-- 0x550f8 | | | 0x10888 | | | el0_svc_naked | | | sys_openat | | | return_to_handler | | | return_to_handler ... In case of c), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo c > /proc/sysrq-trigger ... Call trace: [<ffffffc00044d3ac>] sysrq_handle_crash+0x24/0x30 [<ffffffc000092250>] return_to_handler+0x0/0x40 [<ffffffc000092250>] return_to_handler+0x0/0x40 ... This patch replaces such entries with real addresses preserved in current->ret_stack[] at unwind_frame(). This way, we can cover all the cases. Reviewed-by: Jungseok Lee <jungseoklee85@gmail.com> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> [will: fixed minor context changes conflicting with irq stack bits] Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-12-15 16:33:41 +08:00
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
arm64: fix dump_backtrace/unwind_frame with NULL tsk In some places, dump_backtrace() is called with a NULL tsk parameter, e.g. in bug_handler() in arch/arm64, or indirectly via show_stack() in core code. The expectation is that this is treated as if current were passed instead of NULL. Similar is true of unwind_frame(). Commit a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") didn't take this into account. In dump_backtrace() it compares tsk against current *before* we check if tsk is NULL, and in unwind_frame() we never set tsk if it is NULL. Due to this, we won't initialise irq_stack_ptr in either function. In dump_backtrace() this results in calling dump_mem() for memory immediately above the IRQ stack range, rather than for the relevant range on the task stack. In unwind_frame we'll reject unwinding frames on the IRQ stack. In either case this results in incomplete or misleading backtrace information, but is not otherwise problematic. The initial percpu areas (including the IRQ stacks) are allocated in the linear map, and dump_mem uses __get_user(), so we shouldn't access anything with side-effects, and will handle holes safely. This patch fixes the issue by having both functions handle the NULL tsk case before doing anything else with tsk. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Fixes: a80a0eb70c358f8c ("arm64: make irq_stack_ptr more robust") Acked-by: James Morse <james.morse@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Yang Shi <yang.shi@linaro.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-09-24 00:55:05 +08:00
if (tsk->ret_stack &&
(ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
struct ftrace_ret_stack *ret_stack;
arm64: ftrace: fix a stack tracer's output under function graph tracer Function graph tracer modifies a return address (LR) in a stack frame to hook a function return. This will result in many useless entries (return_to_handler) showing up in a) a stack tracer's output b) perf call graph (with perf record -g) c) dump_backtrace (at panic et al.) For example, in case of a), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo 1 > /proc/sys/kernel/stack_trace_enabled $ cat /sys/kernel/debug/tracing/stack_trace Depth Size Location (54 entries) ----- ---- -------- 0) 4504 16 gic_raise_softirq+0x28/0x150 1) 4488 80 smp_cross_call+0x38/0xb8 2) 4408 48 return_to_handler+0x0/0x40 3) 4360 32 return_to_handler+0x0/0x40 ... In case of b), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ perf record -e mem:XXX:x -ag -- sleep 10 $ perf report ... | | |--0.22%-- 0x550f8 | | | 0x10888 | | | el0_svc_naked | | | sys_openat | | | return_to_handler | | | return_to_handler ... In case of c), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo c > /proc/sysrq-trigger ... Call trace: [<ffffffc00044d3ac>] sysrq_handle_crash+0x24/0x30 [<ffffffc000092250>] return_to_handler+0x0/0x40 [<ffffffc000092250>] return_to_handler+0x0/0x40 ... This patch replaces such entries with real addresses preserved in current->ret_stack[] at unwind_frame(). This way, we can cover all the cases. Reviewed-by: Jungseok Lee <jungseoklee85@gmail.com> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> [will: fixed minor context changes conflicting with irq stack bits] Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-12-15 16:33:41 +08:00
/*
* This is a case where function graph tracer has
* modified a return address (LR) in a stack frame
* to hook a function return.
* So replace it to an original value.
*/
ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
if (WARN_ON_ONCE(!ret_stack))
return -EINVAL;
frame->pc = ret_stack->ret;
arm64: ftrace: fix a stack tracer's output under function graph tracer Function graph tracer modifies a return address (LR) in a stack frame to hook a function return. This will result in many useless entries (return_to_handler) showing up in a) a stack tracer's output b) perf call graph (with perf record -g) c) dump_backtrace (at panic et al.) For example, in case of a), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo 1 > /proc/sys/kernel/stack_trace_enabled $ cat /sys/kernel/debug/tracing/stack_trace Depth Size Location (54 entries) ----- ---- -------- 0) 4504 16 gic_raise_softirq+0x28/0x150 1) 4488 80 smp_cross_call+0x38/0xb8 2) 4408 48 return_to_handler+0x0/0x40 3) 4360 32 return_to_handler+0x0/0x40 ... In case of b), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ perf record -e mem:XXX:x -ag -- sleep 10 $ perf report ... | | |--0.22%-- 0x550f8 | | | 0x10888 | | | el0_svc_naked | | | sys_openat | | | return_to_handler | | | return_to_handler ... In case of c), $ echo function_graph > /sys/kernel/debug/tracing/current_tracer $ echo c > /proc/sysrq-trigger ... Call trace: [<ffffffc00044d3ac>] sysrq_handle_crash+0x24/0x30 [<ffffffc000092250>] return_to_handler+0x0/0x40 [<ffffffc000092250>] return_to_handler+0x0/0x40 ... This patch replaces such entries with real addresses preserved in current->ret_stack[] at unwind_frame(). This way, we can cover all the cases. Reviewed-by: Jungseok Lee <jungseoklee85@gmail.com> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> [will: fixed minor context changes conflicting with irq stack bits] Signed-off-by: Will Deacon <will.deacon@arm.com>
2015-12-15 16:33:41 +08:00
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
frame->pc = ptrauth_strip_insn_pac(frame->pc);
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
bool (*fn)(void *, unsigned long), void *data)
{
while (1) {
int ret;
if (!fn(data, frame->pc))
break;
ret = unwind_frame(tsk, frame);
if (ret < 0)
break;
}
}
NOKPROBE_SYMBOL(walk_stackframe);
static void dump_backtrace_entry(unsigned long where, const char *loglvl)
{
printk("%s %pS\n", loglvl, (void *)where);
}
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl)
{
struct stackframe frame;
int skip = 0;
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
if (regs) {
if (user_mode(regs))
return;
skip = 1;
}
if (!tsk)
tsk = current;
if (!try_get_task_stack(tsk))
return;
if (tsk == current) {
start_backtrace(&frame,
(unsigned long)__builtin_frame_address(0),
(unsigned long)dump_backtrace);
} else {
/*
* task blocked in __switch_to
*/
start_backtrace(&frame,
thread_saved_fp(tsk),
thread_saved_pc(tsk));
}
printk("%sCall trace:\n", loglvl);
do {
/* skip until specified stack frame */
if (!skip) {
dump_backtrace_entry(frame.pc, loglvl);
} else if (frame.fp == regs->regs[29]) {
skip = 0;
/*
* Mostly, this is the case where this function is
* called in panic/abort. As exception handler's
* stack frame does not contain the corresponding pc
* at which an exception has taken place, use regs->pc
* instead.
*/
dump_backtrace_entry(regs->pc, loglvl);
}
} while (!unwind_frame(tsk, &frame));
put_task_stack(tsk);
}
void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
{
dump_backtrace(NULL, tsk, loglvl);
barrier();
}
#ifdef CONFIG_STACKTRACE
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
struct stackframe frame;
if (regs)
start_backtrace(&frame, regs->regs[29], regs->pc);
else if (task == current)
start_backtrace(&frame,
(unsigned long)__builtin_frame_address(0),
(unsigned long)arch_stack_walk);
else
start_backtrace(&frame, thread_saved_fp(task),
thread_saved_pc(task));
walk_stackframe(task, &frame, consume_entry, cookie);
}
#endif