perf script: Implement --graph-function
Add a ftrace style --graph-function argument to 'perf script' that allows to print itrace function calls only below a given function. This makes it easier to find the code of interest in a large trace. % perf record -e intel_pt//k -a sleep 1 % perf script --graph-function group_sched_in --call-trace perf 900 [000] 194167.205652203: ([kernel.kallsyms]) group_sched_in perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) event_sched_in.isra.107 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_set_state.part.71 perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_time perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_disable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_log_itrace_start perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_event_update_userpage perf 900 [000] 194167.205652203: ([kernel.kallsyms]) calc_timer_values perf 900 [000] 194167.205652203: ([kernel.kallsyms]) sched_clock_cpu perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax perf 900 [000] 194167.205652203: ([kernel.kallsyms]) arch_perf_update_userpage perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __fentry__ perf 900 [000] 194167.205652203: ([kernel.kallsyms]) using_native_sched_clock perf 900 [000] 194167.205652203: ([kernel.kallsyms]) sched_clock_stable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) perf_pmu_enable perf 900 [000] 194167.205652203: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) group_sched_in swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) event_sched_in.isra.107 swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_set_state.part.71 swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_update_time swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_pmu_disable swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_log_itrace_start swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) perf_event_update_userpage swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) calc_timer_values swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) sched_clock_cpu swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __x86_indirect_thunk_rax swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) arch_perf_update_userpage swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) __fentry__ swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) using_native_sched_clock swapper 0 [001] 194167.205660693: ([kernel.kallsyms]) sched_clock_stable Signed-off-by: Andi Kleen <ak@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Leo Yan <leo.yan@linaro.org> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Kim Phillips <kim.phillips@arm.com> Link: http://lkml.kernel.org/r/20180920180540.14039-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
d1b1552e15
commit
99f753f048
|
@ -397,6 +397,10 @@ include::itrace.txt[]
|
||||||
--call-ret-trace::
|
--call-ret-trace::
|
||||||
Show call and return stream for intel_pt traces.
|
Show call and return stream for intel_pt traces.
|
||||||
|
|
||||||
|
--graph-function::
|
||||||
|
For itrace only show specified functions and their callees for
|
||||||
|
itrace. Multiple functions can be separated by comma.
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
--------
|
--------
|
||||||
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
||||||
|
|
|
@ -1105,6 +1105,35 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample,
|
||||||
return printed;
|
return printed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *resolve_branch_sym(struct perf_sample *sample,
|
||||||
|
struct perf_evsel *evsel,
|
||||||
|
struct thread *thread,
|
||||||
|
struct addr_location *al,
|
||||||
|
u64 *ip)
|
||||||
|
{
|
||||||
|
struct addr_location addr_al;
|
||||||
|
struct perf_event_attr *attr = &evsel->attr;
|
||||||
|
const char *name = NULL;
|
||||||
|
|
||||||
|
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
|
||||||
|
if (sample_addr_correlates_sym(attr)) {
|
||||||
|
thread__resolve(thread, &addr_al, sample);
|
||||||
|
if (addr_al.sym)
|
||||||
|
name = addr_al.sym->name;
|
||||||
|
else
|
||||||
|
*ip = sample->addr;
|
||||||
|
} else {
|
||||||
|
*ip = sample->addr;
|
||||||
|
}
|
||||||
|
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
|
||||||
|
if (al->sym)
|
||||||
|
name = al->sym->name;
|
||||||
|
else
|
||||||
|
*ip = sample->ip;
|
||||||
|
}
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
static int perf_sample__fprintf_callindent(struct perf_sample *sample,
|
static int perf_sample__fprintf_callindent(struct perf_sample *sample,
|
||||||
struct perf_evsel *evsel,
|
struct perf_evsel *evsel,
|
||||||
struct thread *thread,
|
struct thread *thread,
|
||||||
|
@ -1112,7 +1141,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
|
||||||
{
|
{
|
||||||
struct perf_event_attr *attr = &evsel->attr;
|
struct perf_event_attr *attr = &evsel->attr;
|
||||||
size_t depth = thread_stack__depth(thread);
|
size_t depth = thread_stack__depth(thread);
|
||||||
struct addr_location addr_al;
|
|
||||||
const char *name = NULL;
|
const char *name = NULL;
|
||||||
static int spacing;
|
static int spacing;
|
||||||
int len = 0;
|
int len = 0;
|
||||||
|
@ -1126,22 +1154,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
|
||||||
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
|
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
|
||||||
depth += 1;
|
depth += 1;
|
||||||
|
|
||||||
if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
|
name = resolve_branch_sym(sample, evsel, thread, al, &ip);
|
||||||
if (sample_addr_correlates_sym(attr)) {
|
|
||||||
thread__resolve(thread, &addr_al, sample);
|
|
||||||
if (addr_al.sym)
|
|
||||||
name = addr_al.sym->name;
|
|
||||||
else
|
|
||||||
ip = sample->addr;
|
|
||||||
} else {
|
|
||||||
ip = sample->addr;
|
|
||||||
}
|
|
||||||
} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
|
|
||||||
if (al->sym)
|
|
||||||
name = al->sym->name;
|
|
||||||
else
|
|
||||||
ip = sample->ip;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
|
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
|
||||||
dlen += fprintf(fp, "(");
|
dlen += fprintf(fp, "(");
|
||||||
|
@ -1647,6 +1660,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool show_event(struct perf_sample *sample,
|
||||||
|
struct perf_evsel *evsel,
|
||||||
|
struct thread *thread,
|
||||||
|
struct addr_location *al)
|
||||||
|
{
|
||||||
|
int depth = thread_stack__depth(thread);
|
||||||
|
|
||||||
|
if (!symbol_conf.graph_function)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (thread->filter) {
|
||||||
|
if (depth <= thread->filter_entry_depth) {
|
||||||
|
thread->filter = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
const char *s = symbol_conf.graph_function;
|
||||||
|
u64 ip;
|
||||||
|
const char *name = resolve_branch_sym(sample, evsel, thread, al,
|
||||||
|
&ip);
|
||||||
|
unsigned nlen;
|
||||||
|
|
||||||
|
if (!name)
|
||||||
|
return false;
|
||||||
|
nlen = strlen(name);
|
||||||
|
while (*s) {
|
||||||
|
unsigned len = strcspn(s, ",");
|
||||||
|
if (nlen == len && !strncmp(name, s, len)) {
|
||||||
|
thread->filter = true;
|
||||||
|
thread->filter_entry_depth = depth;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
s += len;
|
||||||
|
if (*s == ',')
|
||||||
|
s++;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void process_event(struct perf_script *script,
|
static void process_event(struct perf_script *script,
|
||||||
struct perf_sample *sample, struct perf_evsel *evsel,
|
struct perf_sample *sample, struct perf_evsel *evsel,
|
||||||
struct addr_location *al,
|
struct addr_location *al,
|
||||||
|
@ -1661,6 +1715,9 @@ static void process_event(struct perf_script *script,
|
||||||
if (output[type].fields == 0)
|
if (output[type].fields == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (!show_event(sample, evsel, thread, al))
|
||||||
|
return;
|
||||||
|
|
||||||
++es->samples;
|
++es->samples;
|
||||||
|
|
||||||
perf_sample__fprintf_start(sample, thread, evsel,
|
perf_sample__fprintf_start(sample, thread, evsel,
|
||||||
|
@ -3237,6 +3294,8 @@ int cmd_script(int argc, const char **argv)
|
||||||
"Decode calls from from itrace", parse_call_trace),
|
"Decode calls from from itrace", parse_call_trace),
|
||||||
OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
|
OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
|
||||||
"Decode calls and returns from itrace", parse_callret_trace),
|
"Decode calls and returns from itrace", parse_callret_trace),
|
||||||
|
OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
|
||||||
|
"Only print symbols and callees with --call-trace/--call-ret-trace"),
|
||||||
OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
|
OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
|
||||||
"Stop display of callgraph at these symbols"),
|
"Stop display of callgraph at these symbols"),
|
||||||
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
|
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
|
||||||
|
@ -3494,7 +3553,8 @@ int cmd_script(int argc, const char **argv)
|
||||||
script.session = session;
|
script.session = session;
|
||||||
script__setup_sample_type(&script);
|
script__setup_sample_type(&script);
|
||||||
|
|
||||||
if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
|
if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
|
||||||
|
symbol_conf.graph_function)
|
||||||
itrace_synth_opts.thread_stack = true;
|
itrace_synth_opts.thread_stack = true;
|
||||||
|
|
||||||
session->itrace_synth_opts = &itrace_synth_opts;
|
session->itrace_synth_opts = &itrace_synth_opts;
|
||||||
|
|
|
@ -123,7 +123,8 @@ struct symbol_conf {
|
||||||
const char *vmlinux_name,
|
const char *vmlinux_name,
|
||||||
*kallsyms_name,
|
*kallsyms_name,
|
||||||
*source_prefix,
|
*source_prefix,
|
||||||
*field_sep;
|
*field_sep,
|
||||||
|
*graph_function;
|
||||||
const char *default_guest_vmlinux_name,
|
const char *default_guest_vmlinux_name,
|
||||||
*default_guest_kallsyms,
|
*default_guest_kallsyms,
|
||||||
*default_guest_modules;
|
*default_guest_modules;
|
||||||
|
|
|
@ -42,6 +42,8 @@ struct thread {
|
||||||
void *addr_space;
|
void *addr_space;
|
||||||
struct unwind_libunwind_ops *unwind_libunwind_ops;
|
struct unwind_libunwind_ops *unwind_libunwind_ops;
|
||||||
#endif
|
#endif
|
||||||
|
bool filter;
|
||||||
|
int filter_entry_depth;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct machine;
|
struct machine;
|
||||||
|
|
Loading…
Reference in New Issue