perf script: Implement --graph-function

Add a ftrace style --graph-function argument to 'perf script' that
allows to print itrace function calls only below a given function. This
makes it easier to find the code of interest in a large trace.

% perf record -e intel_pt//k -a sleep 1
% perf script --graph-function group_sched_in --call-trace
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])          group_sched_in
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              event_sched_in.isra.107
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_event_set_state.part.71
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                      perf_event_update_time
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_pmu_disable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_log_itrace_start
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                      perf_event_update_userpage
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                          calc_timer_values
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              sched_clock_cpu
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                          __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                          arch_perf_update_userpage
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              __fentry__
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              using_native_sched_clock
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              sched_clock_stable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_pmu_enable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])          group_sched_in
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])              __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])              event_sched_in.isra.107
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  perf_event_set_state.part.71
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                      perf_event_update_time
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  perf_pmu_disable
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  perf_log_itrace_start
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                      perf_event_update_userpage
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                          calc_timer_values
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              sched_clock_cpu
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                          __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                          arch_perf_update_userpage
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              __fentry__
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              using_native_sched_clock
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              sched_clock_stable

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/r/20180920180540.14039-5-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Andi Kleen 2018-09-20 11:05:39 -07:00 committed by Arnaldo Carvalho de Melo
parent d1b1552e15
commit 99f753f048
4 changed files with 86 additions and 19 deletions

View File

@ -397,6 +397,10 @@ include::itrace.txt[]
--call-ret-trace:: --call-ret-trace::
Show call and return stream for intel_pt traces. Show call and return stream for intel_pt traces.
--graph-function::
For itrace only show specified functions and their callees for
itrace. Multiple functions can be separated by comma.
SEE ALSO SEE ALSO
-------- --------
linkperf:perf-record[1], linkperf:perf-script-perl[1], linkperf:perf-record[1], linkperf:perf-script-perl[1],

View File

@ -1105,6 +1105,35 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample,
return printed; return printed;
} }
static const char *resolve_branch_sym(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
struct addr_location *al,
u64 *ip)
{
struct addr_location addr_al;
struct perf_event_attr *attr = &evsel->attr;
const char *name = NULL;
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
if (sample_addr_correlates_sym(attr)) {
thread__resolve(thread, &addr_al, sample);
if (addr_al.sym)
name = addr_al.sym->name;
else
*ip = sample->addr;
} else {
*ip = sample->addr;
}
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
if (al->sym)
name = al->sym->name;
else
*ip = sample->ip;
}
return name;
}
static int perf_sample__fprintf_callindent(struct perf_sample *sample, static int perf_sample__fprintf_callindent(struct perf_sample *sample,
struct perf_evsel *evsel, struct perf_evsel *evsel,
struct thread *thread, struct thread *thread,
@ -1112,7 +1141,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
{ {
struct perf_event_attr *attr = &evsel->attr; struct perf_event_attr *attr = &evsel->attr;
size_t depth = thread_stack__depth(thread); size_t depth = thread_stack__depth(thread);
struct addr_location addr_al;
const char *name = NULL; const char *name = NULL;
static int spacing; static int spacing;
int len = 0; int len = 0;
@ -1126,22 +1154,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN) if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
depth += 1; depth += 1;
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { name = resolve_branch_sym(sample, evsel, thread, al, &ip);
if (sample_addr_correlates_sym(attr)) {
thread__resolve(thread, &addr_al, sample);
if (addr_al.sym)
name = addr_al.sym->name;
else
ip = sample->addr;
} else {
ip = sample->addr;
}
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
if (al->sym)
name = al->sym->name;
else
ip = sample->ip;
}
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) { if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
dlen += fprintf(fp, "("); dlen += fprintf(fp, "(");
@ -1647,6 +1660,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
} }
} }
static bool show_event(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
struct addr_location *al)
{
int depth = thread_stack__depth(thread);
if (!symbol_conf.graph_function)
return true;
if (thread->filter) {
if (depth <= thread->filter_entry_depth) {
thread->filter = false;
return false;
}
return true;
} else {
const char *s = symbol_conf.graph_function;
u64 ip;
const char *name = resolve_branch_sym(sample, evsel, thread, al,
&ip);
unsigned nlen;
if (!name)
return false;
nlen = strlen(name);
while (*s) {
unsigned len = strcspn(s, ",");
if (nlen == len && !strncmp(name, s, len)) {
thread->filter = true;
thread->filter_entry_depth = depth;
return true;
}
s += len;
if (*s == ',')
s++;
}
return false;
}
}
static void process_event(struct perf_script *script, static void process_event(struct perf_script *script,
struct perf_sample *sample, struct perf_evsel *evsel, struct perf_sample *sample, struct perf_evsel *evsel,
struct addr_location *al, struct addr_location *al,
@ -1661,6 +1715,9 @@ static void process_event(struct perf_script *script,
if (output[type].fields == 0) if (output[type].fields == 0)
return; return;
if (!show_event(sample, evsel, thread, al))
return;
++es->samples; ++es->samples;
perf_sample__fprintf_start(sample, thread, evsel, perf_sample__fprintf_start(sample, thread, evsel,
@ -3237,6 +3294,8 @@ int cmd_script(int argc, const char **argv)
"Decode calls from from itrace", parse_call_trace), "Decode calls from from itrace", parse_call_trace),
OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL, OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
"Decode calls and returns from itrace", parse_callret_trace), "Decode calls and returns from itrace", parse_callret_trace),
OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
"Only print symbols and callees with --call-trace/--call-ret-trace"),
OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]", OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
"Stop display of callgraph at these symbols"), "Stop display of callgraph at these symbols"),
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@ -3494,7 +3553,8 @@ int cmd_script(int argc, const char **argv)
script.session = session; script.session = session;
script__setup_sample_type(&script); script__setup_sample_type(&script);
if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
symbol_conf.graph_function)
itrace_synth_opts.thread_stack = true; itrace_synth_opts.thread_stack = true;
session->itrace_synth_opts = &itrace_synth_opts; session->itrace_synth_opts = &itrace_synth_opts;

View File

@ -123,7 +123,8 @@ struct symbol_conf {
const char *vmlinux_name, const char *vmlinux_name,
*kallsyms_name, *kallsyms_name,
*source_prefix, *source_prefix,
*field_sep; *field_sep,
*graph_function;
const char *default_guest_vmlinux_name, const char *default_guest_vmlinux_name,
*default_guest_kallsyms, *default_guest_kallsyms,
*default_guest_modules; *default_guest_modules;

View File

@ -42,6 +42,8 @@ struct thread {
void *addr_space; void *addr_space;
struct unwind_libunwind_ops *unwind_libunwind_ops; struct unwind_libunwind_ops *unwind_libunwind_ops;
#endif #endif
bool filter;
int filter_entry_depth;
}; };
struct machine; struct machine;