From 834fd46ddb50953cf3fd9caa3f35485715c62ea3 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Thu, 6 Aug 2015 11:24:29 +0200 Subject: [PATCH 01/26] perf trace: Add total time column to summary. It is cumbersome to manually calculate the total time spent in a given syscall by multiplying the average value with the number of calls. Instead, we now do this directly inside perf trace. Note that this is also done by 'strace', which even adds a column with relative numbers - something we could do in the future. Example: perf trace -s find /some/folder > /dev/null Summary of events: find (19976), 700123 events, 100.0%, 0.000 msec syscall calls total min avg max stddev (msec) (msec) (msec) (msec) (%) --------------- -------- --------- --------- --------- --------- ------ read 4 0.006 0.001 0.002 0.003 27.42% write 8046 9.617 0.001 0.001 0.035 0.56% open 34196 40.384 0.001 0.001 0.071 0.30% close 68375 57.104 0.001 0.001 0.076 0.25% stat 4 0.004 0.001 0.001 0.001 3.14% fstat 34189 27.518 0.001 0.001 0.060 0.34% mmap 13 0.029 0.001 0.002 0.003 10.74% mprotect 6 0.018 0.002 0.003 0.005 17.04% munmap 3 0.014 0.003 0.005 0.006 24.87% brk 87 0.490 0.001 0.006 0.016 6.50% ioctl 3 0.004 0.001 0.001 0.003 36.39% access 1 0.004 0.004 0.004 0.004 0.00% uname 1 0.001 0.001 0.001 0.001 0.00% getdents 68393 143.600 0.001 0.002 0.187 0.95% fchdir 68371 56.980 0.001 0.001 0.111 0.39% arch_prctl 1 0.001 0.001 0.001 0.001 0.00% openat 34184 41.737 0.001 0.001 0.102 0.41% newfstatat 34184 41.180 0.001 0.001 0.064 0.34% Signed-off-by: Milian Wolff Tested-by: Arnaldo Carvalho de Melo LPU-Reference: 1438853069-5902-1-git-send-email-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a47497011c93..a25048c85b76 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, "\n"); - printed += fprintf(fp, " syscall calls min avg max stddev\n"); - printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); - printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); + printed += fprintf(fp, " syscall calls total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); /* each int_node is a syscall */ while (inode) { @@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, sc = &trace->syscalls.table[inode->i]; printed += fprintf(fp, " %-15s", sc->name); - printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", - n, min, avg); + printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", + n, avg * n, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } From 098d2164e3441c252eaa28906d45e16b7bf1bd2b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 1 Jul 2015 02:13:49 +0000 Subject: [PATCH 02/26] bpf: Use correct #ifdef controller for trace_call_bpf() Commit e1abf2cc8d5d80b41c4419368ec743ccadbb131e ("bpf: Fix the build on BPF_SYSCALL=y && !CONFIG_TRACING kernels, make it more configurable") updated the building condition of bpf_trace.o from CONFIG_BPF_SYSCALL to CONFIG_BPF_EVENTS, but the corresponding #ifdef controller in trace_events.h for trace_call_bpf() was not changed. Which, in theory, is incorrect. With current Kconfigs, we can create a .config with CONFIG_BPF_SYSCALL=y and CONFIG_BPF_EVENTS=n by unselecting CONFIG_KPROBE_EVENT and selecting CONFIG_BPF_SYSCALL. With these options, trace_call_bpf() will be defined as an extern function, but if anyone calls it a symbol missing error will be triggered since bpf_trace.o was not built. This patch changes the #ifdef controller for trace_call_bpf() from CONFIG_BPF_SYSCALL to CONFIG_BPF_EVENTS. I'll show its correctness: Before this patch: BPF_SYSCALL BPF_EVENTS trace_call_bpf bpf_trace.o y y normal compiled n n inline not compiled y n normal not compiled (incorrect) n y impossible (BPF_EVENTS depends on BPF_SYSCALL) After this patch: BPF_SYSCALL BPF_EVENTS trace_call_bpf bpf_trace.o y y normal compiled n n inline not compiled y n inline not compiled (fixed) n y impossible (BPF_EVENTS depends on BPF_SYSCALL) So this patch doesn't break anything. QED. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Kaixu Xia Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1435716878-189507-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 1063c850dbab..180dbf8720f9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -542,7 +542,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file, event_triggers_post_call(file, tt); } -#ifdef CONFIG_BPF_SYSCALL +#ifdef CONFIG_BPF_EVENTS unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); #else static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) From 04a22fae4cbc1f7d3f7471e9b36359f98bd3f043 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 1 Jul 2015 02:13:50 +0000 Subject: [PATCH 03/26] tracing, perf: Implement BPF programs attached to uprobes By copying BPF related operation to uprobe processing path, this patch allow users attach BPF programs to uprobes like what they are already doing on kprobes. After this patch, users are allowed to use PERF_EVENT_IOC_SET_BPF on a uprobe perf event. Which make it possible to profile user space programs and kernel events together using BPF. Because of this patch, CONFIG_BPF_EVENTS should be selected by CONFIG_UPROBE_EVENT to ensure trace_call_bpf() is compiled even if KPROBE_EVENT is not set. Signed-off-by: Wang Nan Acked-by: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Kaixu Xia Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1435716878-189507-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/trace_events.h | 5 +++++ kernel/events/core.c | 4 ++-- kernel/trace/Kconfig | 2 +- kernel/trace/trace_uprobe.c | 5 +++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 180dbf8720f9..ed27917cabc9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -243,6 +243,7 @@ enum { TRACE_EVENT_FL_USE_CALL_FILTER_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, TRACE_EVENT_FL_KPROBE_BIT, + TRACE_EVENT_FL_UPROBE_BIT, }; /* @@ -257,6 +258,7 @@ enum { * USE_CALL_FILTER - For trace internal events, don't use file filter * TRACEPOINT - Event is a tracepoint * KPROBE - Event is a kprobe + * UPROBE - Event is a uprobe */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -267,8 +269,11 @@ enum { TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), + TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), }; +#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) + struct trace_event_call { struct list_head list; struct trace_event_class *class; diff --git a/kernel/events/core.c b/kernel/events/core.c index bdea12924b11..77f9e5d0e2d1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6846,8 +6846,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) if (event->tp_event->prog) return -EEXIST; - if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) - /* bpf programs can only be attached to kprobes */ + if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) + /* bpf programs can only be attached to u/kprobes */ return -EINVAL; prog = bpf_prog_get(prog_fd); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3b9a48ae153a..1153c43428f3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -434,7 +434,7 @@ config UPROBE_EVENT config BPF_EVENTS depends on BPF_SYSCALL - depends on KPROBE_EVENT + depends on KPROBE_EVENT || UPROBE_EVENT bool default y help diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index aa1ea7b36fa8..f97479f1ce35 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, { struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; + struct bpf_prog *prog = call->prog; struct hlist_head *head; void *data; int size, esize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; @@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu) return -ENODEV; } + call->flags = TRACE_EVENT_FL_UPROBE; call->class->reg = trace_uprobe_register; call->data = tu; ret = trace_add_event_call(call); From 0af0885ef69c182d1fa6bb201cd0570e9aa384eb Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 31 Jul 2015 10:35:33 -0300 Subject: [PATCH 04/26] perf tools: Introduce veprintf va_args alternative to eprintf(). Signed-off-by: Wang Nan Acked-by: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Kaixu Xia Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/n/1436445342-1402-19-git-send-email-wangnan0@huawei.com [ split from another patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 5 +++++ tools/perf/util/debug.h | 1 + 2 files changed, 6 insertions(+) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2da5581ec74d..86d9c7302598 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } +int veprintf(int level, int var, const char *fmt, va_list args) +{ + return _eprintf(level, var, fmt, args); +} + int eprintf(int level, int var, const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index caac2fdc6105..8b9a088c32ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...); int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); +int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); From 5a023b57a8e96327925a39312bccc443a7c540b6 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 19 Jun 2015 08:42:48 +0000 Subject: [PATCH 05/26] perf tools: Add missing forward declaration of struct map to probe-event.h Commit 7b6ff0bdbf4f7f429c2116cca92a6d171217449e ("perf probe ppc64le: Fixup function entry if using kallsyms lookup") adds 'struct map' into probe-event.h but not forward declares it. This patch fixes it. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Kaixu Xia Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Fixes: 7b6ff0bdbf4f ("perf probe ppc64le: Fixup function entry if using kallsyms lookup") Link: http://lkml.kernel.org/n/1436445342-1402-30-git-send-email-wangnan0@huawei.com [ No need to include map.h, just forward declare 'struct map' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 20f555d1ae1c..83ee95e9743b 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -106,6 +106,8 @@ struct variable_list { struct strlist *vars; /* Available variables */ }; +struct map; + /* Command string to events */ extern int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev); From 421a50f3fafaf271bb3293378eaafca71337dfec Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jul 2015 14:31:22 +0200 Subject: [PATCH 06/26] perf stat: Introduce struct perf_stat_config Moving 'aggr_mode' into new struct. The point is to centralize the base stat config so it could be used localy together with other stat routines in other parts of perf code. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1437481927-29538-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 39 ++++++++++++++++++++++----------------- tools/perf/util/stat.h | 4 ++++ 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d99d850e1444..bafb830b1bd9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -102,7 +102,6 @@ static struct target target = { static int run_count = 1; static bool no_inherit = false; static bool scale = true; -static enum aggr_mode aggr_mode = AGGR_GLOBAL; static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -126,6 +125,10 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu); static volatile int done = 0; +static struct perf_stat_config stat_config = { + .aggr_mode = AGGR_GLOBAL, +}; + static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -230,7 +233,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, if (skip) count = &zero; - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_THREAD: case AGGR_CORE: case AGGR_SOCKET: @@ -238,7 +241,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, if (!evsel->snapshot) perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, scale, NULL); - if (aggr_mode == AGGR_NONE) + if (stat_config.aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: @@ -291,7 +294,7 @@ static int process_counter(struct perf_evsel *counter) if (ret) return ret; - if (aggr_mode != AGGR_GLOBAL) + if (stat_config.aggr_mode != AGGR_GLOBAL) return 0; if (!counter->snapshot) @@ -578,7 +581,7 @@ static void print_noise(struct perf_evsel *evsel, double avg) static void aggr_printout(struct perf_evsel *evsel, int id, int nr) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_CORE: fprintf(output, "S%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), @@ -670,7 +673,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) aggr_printout(evsel, id, nr); - if (aggr_mode == AGGR_GLOBAL) + if (stat_config.aggr_mode == AGGR_GLOBAL) cpu = 0; fprintf(output, fmt, avg, csv_sep); @@ -688,7 +691,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (csv_output || interval) return; - perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); + perf_stat__print_shadow_stats(output, evsel, avg, cpu, + stat_config.aggr_mode); } static void print_aggr(char *prefix) @@ -909,7 +913,7 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); if (num_print_interval == 0 && !csv_output) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); break; @@ -985,7 +989,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) else print_header(argc, argv); - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: print_aggr(prefix); @@ -1064,7 +1068,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, static int perf_stat_init_aggr_mode(void) { - switch (aggr_mode) { + switch (stat_config.aggr_mode) { case AGGR_SOCKET: if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { perror("cannot build socket map"); @@ -1286,7 +1290,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) stat__set_big_num), OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"), - OPT_SET_UINT('A', "no-aggr", &aggr_mode, + OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), @@ -1302,11 +1306,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &interval, "print counts at regular interval in ms (>= 100)"), - OPT_SET_UINT(0, "per-socket", &aggr_mode, + OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), - OPT_SET_UINT(0, "per-core", &aggr_mode, + OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), - OPT_SET_UINT(0, "per-thread", &aggr_mode, + OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), @@ -1399,7 +1403,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { + if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { fprintf(stderr, "The --per-thread option is only available " "when monitoring via -p -t options.\n"); parse_options_usage(NULL, options, "p", 1); @@ -1411,7 +1415,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) * no_aggr, cgroup are for system-wide only * --per-thread is aggregated per thread, we dont mix it with cpu mode */ - if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && + if (((stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1444,7 +1449,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) thread_map__read_comms(evsel_list->threads); if (interval && interval < 100) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1cfbe0a980ac..078bee49ccad 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -50,6 +50,10 @@ struct perf_counts { struct xyarray *values; }; +struct perf_stat_config { + enum aggr_mode aggr_mode; +}; + static inline struct perf_counts_values* perf_counts(struct perf_counts *counts, int cpu, int thread) { From 711a572ea8ae7e9ab6575403c6d632d058d5cb3d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jul 2015 14:31:23 +0200 Subject: [PATCH 07/26] perf stat: Move 'scale' into struct perf_stat_config Moving 'scale' into struct perf_stat_config. The point is to centralize the base stat config so it could be used localy together with other stat routines in other parts of perf code. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1437481927-29538-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 12 ++++++------ tools/perf/util/stat.h | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bafb830b1bd9..3fb2865e519a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -101,7 +101,6 @@ static struct target target = { static int run_count = 1; static bool no_inherit = false; -static bool scale = true; static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -127,6 +126,7 @@ static volatile int done = 0; static struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, + .scale = true, }; static inline void diff_timespec(struct timespec *r, struct timespec *a, @@ -151,7 +151,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; - if (scale) + if (stat_config.scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; @@ -240,13 +240,13 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, case AGGR_NONE: if (!evsel->snapshot) perf_evsel__compute_deltas(evsel, cpu, thread, count); - perf_counts_values__scale(count, scale, NULL); + perf_counts_values__scale(count, stat_config.scale, NULL); if (stat_config.aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; - if (scale) { + if (stat_config.scale) { aggr->ena += count->ena; aggr->run += count->run; } @@ -299,7 +299,7 @@ static int process_counter(struct perf_evsel *counter) if (!counter->snapshot) perf_evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, scale, &counter->counts->scaled); + perf_counts_values__scale(aggr, stat_config.scale, &counter->counts->scaled); for (i = 0; i < 3; i++) update_stats(&ps->res_stats[i], count[i]); @@ -1274,7 +1274,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), - OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), + OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 078bee49ccad..0a1d83faa7b9 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -52,6 +52,7 @@ struct perf_counts { struct perf_stat_config { enum aggr_mode aggr_mode; + bool scale; }; static inline struct perf_counts_values* From 5821522e9484a8b503f89aa546085900b99589e9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jul 2015 14:31:24 +0200 Subject: [PATCH 08/26] perf stat: Move 'output' into struct perf_stat_config Moving 'output' into struct perf_stat_config. The point is to centralize the base stat config so it could be used localy together with other stat routines in other parts of perf code. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1437481927-29538-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 35 +++++++++++++++++++++++------------ tools/perf/util/stat.h | 1 + 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3fb2865e519a..e3ea8b67703d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -110,7 +110,6 @@ static int big_num_opt = -1; static const char *csv_sep = NULL; static bool csv_output = false; static bool group = false; -static FILE *output = NULL; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; @@ -305,7 +304,7 @@ static int process_counter(struct perf_evsel *counter) update_stats(&ps->res_stats[i], count[i]); if (verbose) { - fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + fprintf(stat_config.output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", perf_evsel__name(counter), count[0], count[1], count[2]); } @@ -548,13 +547,13 @@ static int run_perf_stat(int argc, const char **argv) static void print_running(u64 run, u64 ena) { if (csv_output) { - fprintf(output, "%s%" PRIu64 "%s%.2f", + fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", csv_sep, run, csv_sep, ena ? 100.0 * run / ena : 100.0); } else if (run != ena) { - fprintf(output, " (%.2f%%)", 100.0 * run / ena); + fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); } } @@ -563,9 +562,9 @@ static void print_noise_pct(double total, double avg) double pct = rel_stddev_stats(total, avg); if (csv_output) - fprintf(output, "%s%.2f%%", csv_sep, pct); + fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); else if (pct) - fprintf(output, " ( +-%6.2f%% )", pct); + fprintf(stat_config.output, " ( +-%6.2f%% )", pct); } static void print_noise(struct perf_evsel *evsel, double avg) @@ -583,7 +582,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) { switch (stat_config.aggr_mode) { case AGGR_CORE: - fprintf(output, "S%d-C%*d%s%*d%s", + fprintf(stat_config.output, "S%d-C%*d%s%*d%s", cpu_map__id_to_socket(id), csv_output ? 0 : -8, cpu_map__id_to_cpu(id), @@ -593,7 +592,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_SOCKET: - fprintf(output, "S%*d%s%*d%s", + fprintf(stat_config.output, "S%*d%s%*d%s", csv_output ? 0 : -5, id, csv_sep, @@ -602,12 +601,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_sep); break; case AGGR_NONE: - fprintf(output, "CPU%*d%s", + fprintf(stat_config.output, "CPU%*d%s", csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; case AGGR_THREAD: - fprintf(output, "%*s-%*d%s", + fprintf(stat_config.output, "%*s-%*d%s", csv_output ? 0 : 16, thread_map__comm(evsel->threads, id), csv_output ? 0 : -8, @@ -622,6 +621,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { + FILE *output = stat_config.output; double msecs = avg / 1e6; const char *fmt_v, *fmt_n; char name[25]; @@ -658,6 +658,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { + FILE *output = stat_config.output; double sc = evsel->scale; const char *fmt; int cpu = cpu_map__id_to_cpu(id); @@ -697,6 +698,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void print_aggr(char *prefix) { + FILE *output = stat_config.output; struct perf_evsel *counter; int cpu, cpu2, s, s2, id, nr; double uval; @@ -765,6 +767,7 @@ static void print_aggr(char *prefix) static void print_aggr_thread(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; int nthreads = thread_map__nr(counter->threads); int ncpus = cpu_map__nr(counter->cpus); int cpu, thread; @@ -803,6 +806,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) */ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; struct perf_stat *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; @@ -854,6 +858,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) */ static void print_counter(struct perf_evsel *counter, char *prefix) { + FILE *output = stat_config.output; u64 ena, run, val; double uval; int cpu; @@ -908,6 +913,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix) static void print_interval(char *prefix, struct timespec *ts) { + FILE *output = stat_config.output; static int num_print_interval; sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); @@ -938,6 +944,7 @@ static void print_interval(char *prefix, struct timespec *ts) static void print_header(int argc, const char **argv) { + FILE *output = stat_config.output; int i; fflush(stdout); @@ -967,6 +974,8 @@ static void print_header(int argc, const char **argv) static void print_footer(void) { + FILE *output = stat_config.output; + if (!null_run) fprintf(output, "\n"); fprintf(output, " %17.9f seconds time elapsed", @@ -1013,7 +1022,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (!interval && !csv_output) print_footer(); - fflush(output); + fflush(stat_config.output); } static volatile int signr = -1; @@ -1322,6 +1331,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) }; int status = -EINVAL, run_idx; const char *mode; + FILE *output = stderr; setlocale(LC_ALL, ""); @@ -1332,7 +1342,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); - output = stderr; if (output_name && strcmp(output_name, "-")) output = NULL; @@ -1369,6 +1378,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } } + stat_config.output = output; + if (csv_sep) { csv_output = true; if (!strcmp(csv_sep, "\\t")) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0a1d83faa7b9..ed0e05829cb0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -53,6 +53,7 @@ struct perf_counts { struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; + FILE *output; }; static inline struct perf_counts_values* From ec0d3d1fd292adb80372193c03d859e9cbefd367 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jul 2015 14:31:25 +0200 Subject: [PATCH 09/26] perf stat: Move 'interval' into struct perf_stat_config Moving 'interval' into struct perf_stat_config. The point is to centralize the base stat config so it could be used localy together with other stat routines in other parts of perf code. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1437481927-29538-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 14 +++++++++----- tools/perf/util/stat.h | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e3ea8b67703d..1bdfec8f5fe6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -113,7 +113,6 @@ static bool group = false; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; -static unsigned int interval = 0; static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; @@ -404,6 +403,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf static int __run_perf_stat(int argc, const char **argv) { + int interval = stat_config.interval; char msg[512]; unsigned long long t0, t1; struct perf_evsel *counter; @@ -646,7 +646,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output || interval) + if (csv_output || stat_config.interval) return; if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) @@ -689,7 +689,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output || interval) + if (csv_output || stat_config.interval) return; perf_stat__print_shadow_stats(output, evsel, avg, cpu, @@ -990,6 +990,7 @@ static void print_footer(void) static void print_counters(struct timespec *ts, int argc, const char **argv) { + int interval = stat_config.interval; struct perf_evsel *counter; char buf[64], *prefix = NULL; @@ -1029,7 +1030,7 @@ static volatile int signr = -1; static void skip_signal(int signo) { - if ((child_pid == -1) || interval) + if ((child_pid == -1) || stat_config.interval) done = 1; signr = signo; @@ -1313,7 +1314,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "command to run prior to the measured command"), OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), - OPT_UINTEGER('I', "interval-print", &interval, + OPT_UINTEGER('I', "interval-print", &stat_config.interval, "print counts at regular interval in ms (>= 100)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), @@ -1332,6 +1333,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) int status = -EINVAL, run_idx; const char *mode; FILE *output = stderr; + unsigned int interval; setlocale(LC_ALL, ""); @@ -1342,6 +1344,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + interval = stat_config.interval; + if (output_name && strcmp(output_name, "-")) output = NULL; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index ed0e05829cb0..1da706d848fb 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -54,6 +54,7 @@ struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; FILE *output; + unsigned int interval; }; static inline struct perf_counts_values* From 5e5fe748bec771a810b1f44ec9c19e4b92685246 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jul 2015 14:31:26 +0200 Subject: [PATCH 10/26] perf stat: Pass 'struct perf_stat_config' into process_counter() Passing 'struct perf_stat_config' into process_counter(), so that we can make process_counter() non static and use it from other places. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1437481927-29538-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1bdfec8f5fe6..5a781718c09f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -216,7 +216,8 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) } static int -process_counter_values(struct perf_evsel *evsel, int cpu, int thread, +process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel, + int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; @@ -231,20 +232,20 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, if (skip) count = &zero; - switch (stat_config.aggr_mode) { + switch (config->aggr_mode) { case AGGR_THREAD: case AGGR_CORE: case AGGR_SOCKET: case AGGR_NONE: if (!evsel->snapshot) perf_evsel__compute_deltas(evsel, cpu, thread, count); - perf_counts_values__scale(count, stat_config.scale, NULL); - if (stat_config.aggr_mode == AGGR_NONE) + perf_counts_values__scale(count, config->scale, NULL); + if (config->aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; - if (stat_config.scale) { + if (config->scale) { aggr->ena += count->ena; aggr->run += count->run; } @@ -255,7 +256,8 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread, return 0; } -static int process_counter_maps(struct perf_evsel *counter) +static int process_counter_maps(struct perf_stat_config *config, + struct perf_evsel *counter) { int nthreads = thread_map__nr(counter->threads); int ncpus = perf_evsel__nr_cpus(counter); @@ -266,7 +268,7 @@ static int process_counter_maps(struct perf_evsel *counter) for (thread = 0; thread < nthreads; thread++) { for (cpu = 0; cpu < ncpus; cpu++) { - if (process_counter_values(counter, cpu, thread, + if (process_counter_values(config, counter, cpu, thread, perf_counts(counter->counts, cpu, thread))) return -1; } @@ -275,7 +277,8 @@ static int process_counter_maps(struct perf_evsel *counter) return 0; } -static int process_counter(struct perf_evsel *counter) +static int process_counter(struct perf_stat_config *config, + struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat *ps = counter->priv; @@ -288,22 +291,22 @@ static int process_counter(struct perf_evsel *counter) if (counter->per_pkg) zero_per_pkg(counter); - ret = process_counter_maps(counter); + ret = process_counter_maps(&stat_config, counter); if (ret) return ret; - if (stat_config.aggr_mode != AGGR_GLOBAL) + if (config->aggr_mode != AGGR_GLOBAL) return 0; if (!counter->snapshot) perf_evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, stat_config.scale, &counter->counts->scaled); + perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); for (i = 0; i < 3; i++) update_stats(&ps->res_stats[i], count[i]); if (verbose) { - fprintf(stat_config.output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", perf_evsel__name(counter), count[0], count[1], count[2]); } @@ -352,7 +355,7 @@ static void read_counters(bool close_counters) if (read_counter(counter)) pr_warning("failed to read counter %s\n", counter->name); - if (process_counter(counter)) + if (process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); if (close_counters) { From f80010eb230b94e8d9cf5bf83373a097fb5b2dcc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Jul 2015 14:31:27 +0200 Subject: [PATCH 11/26] perf stat: Move counter processing code into stat object Moving counter processing code into stat object as perf_stat__process_counter. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1437481927-29538-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 141 +------------------------------------- tools/perf/util/stat.c | 139 +++++++++++++++++++++++++++++++++++++ tools/perf/util/stat.h | 3 + 3 files changed, 143 insertions(+), 140 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5a781718c09f..a054ddc0b2a0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -179,145 +179,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } -static void zero_per_pkg(struct perf_evsel *counter) -{ - if (counter->per_pkg_mask) - memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); -} - -static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) -{ - unsigned long *mask = counter->per_pkg_mask; - struct cpu_map *cpus = perf_evsel__cpus(counter); - int s; - - *skip = false; - - if (!counter->per_pkg) - return 0; - - if (cpu_map__empty(cpus)) - return 0; - - if (!mask) { - mask = zalloc(MAX_NR_CPUS); - if (!mask) - return -ENOMEM; - - counter->per_pkg_mask = mask; - } - - s = cpu_map__get_socket(cpus, cpu); - if (s < 0) - return -1; - - *skip = test_and_set_bit(s, mask) == 1; - return 0; -} - -static int -process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel, - int cpu, int thread, - struct perf_counts_values *count) -{ - struct perf_counts_values *aggr = &evsel->counts->aggr; - static struct perf_counts_values zero; - bool skip = false; - - if (check_per_pkg(evsel, cpu, &skip)) { - pr_err("failed to read per-pkg counter\n"); - return -1; - } - - if (skip) - count = &zero; - - switch (config->aggr_mode) { - case AGGR_THREAD: - case AGGR_CORE: - case AGGR_SOCKET: - case AGGR_NONE: - if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, thread, count); - perf_counts_values__scale(count, config->scale, NULL); - if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->values, cpu); - break; - case AGGR_GLOBAL: - aggr->val += count->val; - if (config->scale) { - aggr->ena += count->ena; - aggr->run += count->run; - } - default: - break; - } - - return 0; -} - -static int process_counter_maps(struct perf_stat_config *config, - struct perf_evsel *counter) -{ - int nthreads = thread_map__nr(counter->threads); - int ncpus = perf_evsel__nr_cpus(counter); - int cpu, thread; - - if (counter->system_wide) - nthreads = 1; - - for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - if (process_counter_values(config, counter, cpu, thread, - perf_counts(counter->counts, cpu, thread))) - return -1; - } - } - - return 0; -} - -static int process_counter(struct perf_stat_config *config, - struct perf_evsel *counter) -{ - struct perf_counts_values *aggr = &counter->counts->aggr; - struct perf_stat *ps = counter->priv; - u64 *count = counter->counts->aggr.values; - int i, ret; - - aggr->val = aggr->ena = aggr->run = 0; - init_stats(ps->res_stats); - - if (counter->per_pkg) - zero_per_pkg(counter); - - ret = process_counter_maps(&stat_config, counter); - if (ret) - return ret; - - if (config->aggr_mode != AGGR_GLOBAL) - return 0; - - if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); - - for (i = 0; i < 3; i++) - update_stats(&ps->res_stats[i], count[i]); - - if (verbose) { - fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), count[0], count[1], count[2]); - } - - /* - * Save the full runtime - to allow normalization during printout: - */ - perf_stat__update_shadow_stats(counter, count, 0); - - return 0; -} - /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode @@ -355,7 +216,7 @@ static void read_counters(bool close_counters) if (read_counter(counter)) pr_warning("failed to read counter %s\n", counter->name); - if (process_counter(&stat_config, counter)) + if (perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); if (close_counters) { diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index f2a0d1521e26..c5c709cdc3ce 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) perf_evsel__reset_counts(evsel); } } + +static void zero_per_pkg(struct perf_evsel *counter) +{ + if (counter->per_pkg_mask) + memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); +} + +static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) +{ + unsigned long *mask = counter->per_pkg_mask; + struct cpu_map *cpus = perf_evsel__cpus(counter); + int s; + + *skip = false; + + if (!counter->per_pkg) + return 0; + + if (cpu_map__empty(cpus)) + return 0; + + if (!mask) { + mask = zalloc(MAX_NR_CPUS); + if (!mask) + return -ENOMEM; + + counter->per_pkg_mask = mask; + } + + s = cpu_map__get_socket(cpus, cpu); + if (s < 0) + return -1; + + *skip = test_and_set_bit(s, mask) == 1; + return 0; +} + +static int +process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel, + int cpu, int thread, + struct perf_counts_values *count) +{ + struct perf_counts_values *aggr = &evsel->counts->aggr; + static struct perf_counts_values zero; + bool skip = false; + + if (check_per_pkg(evsel, cpu, &skip)) { + pr_err("failed to read per-pkg counter\n"); + return -1; + } + + if (skip) + count = &zero; + + switch (config->aggr_mode) { + case AGGR_THREAD: + case AGGR_CORE: + case AGGR_SOCKET: + case AGGR_NONE: + if (!evsel->snapshot) + perf_evsel__compute_deltas(evsel, cpu, thread, count); + perf_counts_values__scale(count, config->scale, NULL); + if (config->aggr_mode == AGGR_NONE) + perf_stat__update_shadow_stats(evsel, count->values, cpu); + break; + case AGGR_GLOBAL: + aggr->val += count->val; + if (config->scale) { + aggr->ena += count->ena; + aggr->run += count->run; + } + default: + break; + } + + return 0; +} + +static int process_counter_maps(struct perf_stat_config *config, + struct perf_evsel *counter) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; + + if (counter->system_wide) + nthreads = 1; + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(config, counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) + return -1; + } + } + + return 0; +} + +int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter) +{ + struct perf_counts_values *aggr = &counter->counts->aggr; + struct perf_stat *ps = counter->priv; + u64 *count = counter->counts->aggr.values; + int i, ret; + + aggr->val = aggr->ena = aggr->run = 0; + init_stats(ps->res_stats); + + if (counter->per_pkg) + zero_per_pkg(counter); + + ret = process_counter_maps(config, counter); + if (ret) + return ret; + + if (config->aggr_mode != AGGR_GLOBAL) + return 0; + + if (!counter->snapshot) + perf_evsel__compute_deltas(counter, -1, -1, aggr); + perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); + + for (i = 0; i < 3; i++) + update_stats(&ps->res_stats[i], count[i]); + + if (verbose) { + fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), count[0], count[1], count[2]); + } + + /* + * Save the full runtime - to allow normalization during printout: + */ + perf_stat__update_shadow_stats(counter, count, 0); + + return 0; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1da706d848fb..0b897b083682 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -116,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); void perf_evlist__reset_stats(struct perf_evlist *evlist); + +int perf_stat_process_counter(struct perf_stat_config *config, + struct perf_evsel *counter); #endif From 93df8a1ed6231727c5db94a80b1a6bd5ee67cec3 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 4 Aug 2015 17:10:27 +0100 Subject: [PATCH 12/26] perf tools: Add empty Build files for architectures lacking them perf currently fails to build on MIPS as there is no tools/perf/arch/mips/Build file. Adding an empty file fixes this as there are no MIPS-specific sources to build. It looks like the same is needed for Alpha and PA-RISC, though I haven't been able to test those. Signed-off-by: Ben Hutchings Fixes: 5e8c0fb6a957 ("perf build: Add arch x86 objects building") Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1438704627.7315.2.camel@decadent.org.uk Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/alpha/Build | 1 + tools/perf/arch/mips/Build | 1 + tools/perf/arch/parisc/Build | 1 + 3 files changed, 3 insertions(+) create mode 100644 tools/perf/arch/alpha/Build create mode 100644 tools/perf/arch/mips/Build create mode 100644 tools/perf/arch/parisc/Build diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/alpha/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/mips/Build @@ -0,0 +1 @@ +# empty diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/parisc/Build @@ -0,0 +1 @@ +# empty From 0e332f033a8216fa03792fde69882f66500848c7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:46 -0700 Subject: [PATCH 13/26] perf tools: Add support for cycles, weight branch_info field cycles is a new branch_info field available on some CPUs that indicates the time deltas between branches in the LBR. Add a sort key and output code for the cycles to allow to display the basic block cycles individually in perf report. We also pass in the cycles for weight when LBRs are processed, which allows to get global and local weight, to get an estimate of the total cost. And also print the cycles information for perf report -D. I also added printing for the previously missing LBR flags (mispredict etc.) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/event.h | 3 ++- tools/perf/util/hist.c | 3 ++- tools/perf/util/hist.h | 1 + tools/perf/util/session.c | 16 ++++++++++++---- tools/perf/util/sort.c | 24 ++++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 7 files changed, 43 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c33b69f3374f..960da203ec11 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -109,6 +109,7 @@ OPTIONS - mispredict: "N" for predicted branch, "Y" for mispredicted branch - in_tx: branch in TSX transaction - abort: TSX transaction abort. + - cycles: Cycles in basic block And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 4bb2ae894c78..f729df5e25e6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -134,7 +134,8 @@ struct branch_flags { u64 predicted:1; u64 in_tx:1; u64 abort:1; - u64 reserved:60; + u64 cycles:16; + u64 reserved:44; }; struct branch_entry { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f28d53d4e46..54fc0033dd6a 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a * and not events sampled. Thus we use a pseudo period of 1. */ he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, - 1, 1, 0, true); + 1, bi->flags.cycles ? bi->flags.cycles : 1, + 0, true); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5ed8d9c22981..3881d9815309 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -47,6 +47,7 @@ enum hist_column { HISTC_MEM_SNOOP, HISTC_MEM_DCACHELINE, HISTC_TRANSACTION, + HISTC_CYCLES, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f51eb54aeeb3..18722e774a69 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample) printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); - for (i = 0; i < sample->branch_stack->nr; i++) - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", - i, sample->branch_stack->entries[i].from, - sample->branch_stack->entries[i].to); + for (i = 0; i < sample->branch_stack->nr; i++) { + struct branch_entry *e = &sample->branch_stack->entries[i]; + + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", + i, e->from, e->to, + e->flags.cycles, + e->flags.mispred ? "M" : " ", + e->flags.predicted ? "P" : " ", + e->flags.abort ? "A" : " ", + e->flags.in_tx ? "T" : " ", + (unsigned)e->flags.reserved); + } } static void regs_dump__printf(u64 mask, u64 *regs) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4c65a143a34c..5b7a50c04e45 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, out); } +static int64_t +sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->branch_info->flags.cycles - + right->branch_info->flags.cycles; +} + +static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + if (he->branch_info->flags.cycles == 0) + return repsep_snprintf(bf, size, "%-*s", width, "-"); + return repsep_snprintf(bf, size, "%-*hd", width, + he->branch_info->flags.cycles); +} + +struct sort_entry sort_cycles = { + .se_header = "Basic Block Cycles", + .se_cmp = sort__cycles_cmp, + .se_snprintf = hist_entry__cycles_snprintf, + .se_width_idx = HISTC_CYCLES, +}; + /* --sort daddr_sym */ static int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) @@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), DIM(SORT_IN_TX, "in_tx", sort_in_tx), DIM(SORT_ABORT, "abort", sort_abort), + DIM(SORT_CYCLES, "cycles", sort_cycles), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e97cd476d336..bc6c87a76d16 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -185,6 +185,7 @@ enum sort_type { SORT_MISPREDICT, SORT_ABORT, SORT_IN_TX, + SORT_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, From 98df858ed46ddaaf9be3573eb2b63b57a68c6af7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:47 -0700 Subject: [PATCH 14/26] perf report: Add flag for non ANY branch mode Later patches need to cheaply check that the branch mode is in ANY. Add a new function to check all event attrs and add a flag to the report state, which is then initialized. v2: Rename flag Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 7 +++++++ tools/perf/util/evlist.c | 10 ++++++++++ tools/perf/util/evlist.h | 1 + 3 files changed, 18 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95a47719aec3..3ba0e9737dc5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,7 @@ struct report { bool mem_mode; bool header; bool header_only; + bool nonany_branch_mode; int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; @@ -258,6 +259,12 @@ static int report__setup_sample_type(struct report *rep) else callchain_param.record_mode = CALLCHAIN_FP; } + + /* ??? handle more cases than just ANY? */ + if (!(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) + rep->nonany_branch_mode = true; + return 0; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3b9f411a6b46..373f65b02545 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) return __perf_evlist__combined_sample_type(evlist); } +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + u64 branch_type = 0; + + evlist__for_each(evlist, evsel) + branch_type |= evsel->attr.branch_sample_type; + return branch_type; +} + bool perf_evlist__valid_read_format(struct perf_evlist *evlist) { struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a8930b68456b..397757063da1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist); u64 perf_evlist__read_format(struct perf_evlist *evlist); u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); +u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist); bool perf_evlist__sample_id_all(struct perf_evlist *evlist); u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); From d4957633bf9dab70e566e7dbb2b8d0c61c3a2f1e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:48 -0700 Subject: [PATCH 15/26] perf report: Add infrastructure for a cycles histogram This adds the basic infrastructure to keep track of cycle counts per basic block for annotate. We allocate an array similar to the normal accounting, and then account branch cycles there. We handle two cases: cycles per basic block with start and cycles per branch (these are later used for either IPC or just cycles per BB) In the start case we cannot handle overlaps, so always the longest basic block wins. For the cycles per branch case everything is accurately accounted. v2: Remove unnecessary checks. Slight restructure. Move symbol__get_annotation to another patch. Move histogram allocation. v3: Merged with current tree Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 + tools/perf/util/annotate.c | 127 +++++++++++++++++++++++++++++++++- tools/perf/util/annotate.h | 17 +++++ 3 files changed, 142 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2c1bec39c30e..467a23b14e2f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -187,6 +187,7 @@ static void hists__find_annotations(struct hists *hists, * symbol, free he->ms.sym->src to signal we already * processed this symbol. */ + zfree(¬es->src->cycles_hist); zfree(¬es->src); } } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 03b7bc70eb66..e0b614648044 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym) return 0; } +/* The cycles histogram is lazily allocated. */ +static int symbol__alloc_hist_cycles(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + const size_t size = symbol__size(sym); + + notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); + if (notes->src->cycles_hist == NULL) + return -1; + return 0; +} + void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); pthread_mutex_lock(¬es->lock); - if (notes->src != NULL) + if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); + if (notes->src->cycles_hist) + memset(notes->src->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); + } pthread_mutex_unlock(¬es->lock); } +static int __symbol__account_cycles(struct annotation *notes, + u64 start, + unsigned offset, unsigned cycles, + unsigned have_start) +{ + struct cyc_hist *ch; + + ch = notes->src->cycles_hist; + /* + * For now we can only account one basic block per + * final jump. But multiple could be overlapping. + * Always account the longest one. So when + * a shorter one has been already seen throw it away. + * + * We separately always account the full cycles. + */ + ch[offset].num_aggr++; + ch[offset].cycles_aggr += cycles; + + if (!have_start && ch[offset].have_start) + return 0; + if (ch[offset].num) { + if (have_start && (!ch[offset].have_start || + ch[offset].start > start)) { + ch[offset].have_start = 0; + ch[offset].cycles = 0; + ch[offset].num = 0; + if (ch[offset].reset < 0xffff) + ch[offset].reset++; + } else if (have_start && + ch[offset].start < start) + return 0; + } + ch[offset].have_start = have_start; + ch[offset].start = start; + ch[offset].cycles += cycles; + ch[offset].num++; + return 0; +} + static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, struct annotation *notes, int evidx, u64 addr) { @@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } -static struct annotation *symbol__get_annotation(struct symbol *sym) +static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) { struct annotation *notes = symbol__annotation(sym); @@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym) if (symbol__alloc_hist(sym) < 0) return NULL; } + if (!notes->src->cycles_hist && cycles) { + if (symbol__alloc_hist_cycles(sym) < 0) + return NULL; + } return notes; } @@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, if (sym == NULL) return 0; - notes = symbol__get_annotation(sym); + notes = symbol__get_annotation(sym, false); if (notes == NULL) return -ENOMEM; return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); } +static int symbol__account_cycles(u64 addr, u64 start, + struct symbol *sym, unsigned cycles) +{ + struct annotation *notes; + unsigned offset; + + if (sym == NULL) + return 0; + notes = symbol__get_annotation(sym, true); + if (notes == NULL) + return -ENOMEM; + if (addr < sym->start || addr >= sym->end) + return -ERANGE; + + if (start) { + if (start < sym->start || start >= sym->end) + return -ERANGE; + if (start >= addr) + start = 0; + } + offset = addr - sym->start; + return __symbol__account_cycles(notes, + start ? start - sym->start : 0, + offset, cycles, + !!start); +} + +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles) +{ + unsigned long saddr = 0; + int err; + + if (!cycles) + return 0; + + /* + * Only set start when IPC can be computed. We can only + * compute it when the basic block is completely in a single + * function. + * Special case the case when the jump is elsewhere, but + * it starts on the function start. + */ + if (start && + (start->sym == ams->sym || + (ams->sym && + start->addr == ams->sym->start + ams->map->start))) + saddr = start->al_addr; + if (saddr == 0) + pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n", + ams->addr, + start ? start->addr : 0, + ams->sym ? ams->sym->start + ams->map->start : 0, + saddr); + err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles); + if (err) + pr_debug2("account_cycles failed %d\n", err); + return err; +} + int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) { return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7e78e6c27078..a06518dca4b7 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -79,6 +79,17 @@ struct sym_hist { u64 addr[0]; }; +struct cyc_hist { + u64 start; + u64 cycles; + u64 cycles_aggr; + u32 num; + u32 num_aggr; + u8 have_start; + /* 1 byte padding */ + u16 reset; +}; + struct source_line_samples { double percent; double percent_sum; @@ -97,6 +108,7 @@ struct source_line { * @histogram: Array of addr hit histograms per event being monitored * @lines: If 'print_lines' is specified, per source code line percentages * @source: source parsed from a disassembler like objdump -dS + * @cyc_hist: Average cycles per basic block * * lines is allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for @@ -109,6 +121,7 @@ struct annotated_source { struct source_line *lines; int nr_histograms; int sizeof_sym_hist; + struct cyc_hist *cycles_hist; struct sym_hist histograms[0]; }; @@ -130,6 +143,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles); + int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); From 57849998e2cd24d50295076a1bbd2f029e2d7c38 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:49 -0700 Subject: [PATCH 16/26] perf report: Add processing for cycle histograms Call the earlier added cycle histogram infrastructure from the perf report hist iter callback. For this we walk the branch records. This allows to use cycle histograms when browsing perf report annotate. v2: Rename flag Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-5-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ tools/perf/util/hist.c | 33 +++++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 3 +++ 3 files changed, 39 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3ba0e9737dc5..3a9d1b659fcd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -103,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, if (!ui__has_annotation()) return 0; + hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + rep->nonany_branch_mode); + if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 54fc0033dd6a..a6e9ddd37913 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1415,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other) return 0; } +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode) +{ + struct branch_info *bi; + + /* If we have branch cycles always annotate them. */ + if (bs && bs->nr && bs->entries[0].flags.cycles) { + int i; + + bi = sample__resolve_bstack(sample, al); + if (bi) { + struct addr_map_symbol *prev = NULL; + + /* + * Ignore errors, still want to process the + * other entries. + * + * For non standard branch modes always + * force no IPC (prev == NULL) + * + * Note that perf stores branches reversed from + * program order! + */ + for (i = bs->nr - 1; i >= 0; i--) { + addr_map_symbol__account_cycles(&bi[i].from, + nonany_branch_mode ? NULL : prev, + bi[i].flags.cycles); + prev = &bi[i].to; + } + free(bi); + } + } +} size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3881d9815309..e2f712f85d2e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -350,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) unsigned int hists__sort_list_width(struct hists *hists); +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode); + struct option; int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); From 30e863bb6f708c0abd422fbb0e6b295f5ee6407b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:50 -0700 Subject: [PATCH 17/26] perf annotate: Compute IPC and basic block cycles Compute the IPC and the basic block cycles for the annotate display. IPC is computed by counting the instructions, and then dividing the accounted cycles by that count. The actual IPC computation can only be done at annotate time, because we need to parse the objdump output first to know the number of instructions in the basic block. The cycles/IPC are also put into the perf function annotation so that the display code can show them. Again basic block overlaps are not handled, with the longest winning, but there are some heuristics to hide the IPC when the longest is not the most common. v2: Compute IPC correctly. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-6-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 73 ++++++++++++++++++++++++++++++- tools/perf/util/annotate.h | 2 + 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5995a8bd7c69..6ec179547f72 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -53,6 +53,7 @@ struct annotate_browser { int max_jump_sources; int nr_jumps; bool searching_backwards; + bool have_cycles; u8 addr_width; u8 jumps_width; u8 target_width; @@ -390,7 +391,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = bpos->samples[i].percent; } - if (max_percent < 0.01) { + if (max_percent < 0.01 && pos->ipc == 0) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -869,6 +870,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, return map_symbol__tui_annotate(&he->ms, evsel, hbt); } + +static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end) +{ + unsigned n_insn = 0; + u64 offset; + + for (offset = start; offset <= end; offset++) { + if (browser->offsets[offset]) + n_insn++; + } + return n_insn; +} + +static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, + struct cyc_hist *ch) +{ + unsigned n_insn; + u64 offset; + + n_insn = count_insn(browser, start, end); + if (n_insn && ch->num && ch->cycles) { + float ipc = n_insn / ((double)ch->cycles / (double)ch->num); + + /* Hide data when there are too many overlaps. */ + if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) + return; + + for (offset = start; offset <= end; offset++) { + struct disasm_line *dl = browser->offsets[offset]; + + if (dl) + dl->ipc = ipc; + } + } +} + +/* + * This should probably be in util/annotate.c to share with the tty + * annotate, but right now we need the per byte offsets arrays, + * which are only here. + */ +static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, + struct symbol *sym) +{ + u64 offset; + struct annotation *notes = symbol__annotation(sym); + + if (!notes->src || !notes->src->cycles_hist) + return; + + pthread_mutex_lock(¬es->lock); + for (offset = 0; offset < size; ++offset) { + struct cyc_hist *ch; + + ch = ¬es->src->cycles_hist[offset]; + if (ch && ch->cycles) { + struct disasm_line *dl; + + if (ch->have_start) + count_and_fill(browser, ch->start, offset, ch); + dl = browser->offsets[offset]; + if (dl && ch->num_aggr) + dl->cycles = ch->cycles_aggr / ch->num_aggr; + browser->have_cycles = true; + } + } + pthread_mutex_unlock(¬es->lock); +} + static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, size_t size) { @@ -991,6 +1061,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, } annotate_browser__mark_jump_targets(&browser, size); + annotate__compute_ipc(&browser, size, sym); browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); browser.max_addr_width = hex_width(sym->end); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a06518dca4b7..e9996092a093 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,8 @@ struct disasm_line { char *name; struct ins *ins; int line_nr; + float ipc; + u64 cycles; struct ins_operands ops; }; From f8f4aaead579c947fb8fc051c9d242037025caf3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:51 -0700 Subject: [PATCH 18/26] perf annotate: Finally display IPC and cycle accounting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two new columns to the annotate display and display the average cycles and the compute IPC if available. When the LBR was not in any branch mode the IPC computation is automatically disabled. We still display the cycle information. Example output (with made up numbers): The second column is the IPC and third average cycles. │ __attribute__((noinline)) f2() │ { 5.15 0.07 │ push %rbp 0.01 0.07 │ mov %rsp,%rbp │ c = a / b; 9.87 0.07 │ mov a,%eax 0.07 │ mov b,%ecx 0.07 │ cltd 4.92 0.07 123│ idiv %ecx 70.79 0.07 │ mov %eax,__TMC_END__ │ } 9.25 0.07 │ pop %rbp 0.01 0.07 123│ ← retq v2: Fix display problems. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-7-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 57 ++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 6ec179547f72..b5fc847f9660 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -16,6 +16,9 @@ struct disasm_line_samples { u64 nr; }; +#define IPC_WIDTH 6 +#define CYCLES_WIDTH 6 + struct browser_disasm_line { struct rb_node rb_node; u32 idx; @@ -97,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br return ui_browser__set_color(&browser->b, color); } +static int annotate_browser__pcnt_width(struct annotate_browser *ab) +{ + int w = 7 * ab->nr_events; + + if (ab->have_cycles) + w += IPC_WIDTH + CYCLES_WIDTH; + return w; +} + static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); @@ -107,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int (!current_entry || (browser->use_navkeypressed && !browser->navkeypressed))); int width = browser->width, printed; - int i, pcnt_width = 7 * ab->nr_events; + int i, pcnt_width = annotate_browser__pcnt_width(ab); double percent_max = 0.0; char bf[256]; @@ -117,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } if (dl->offset != -1 && percent_max != 0.0) { - for (i = 0; i < ab->nr_events; i++) { - ui_browser__set_percent_color(browser, - bdl->samples[i].percent, - current_entry); - if (annotate_browser__opts.show_total_period) - slsmg_printf("%6" PRIu64 " ", - bdl->samples[i].nr); - else - slsmg_printf("%6.2f ", bdl->samples[i].percent); + if (percent_max != 0.0) { + for (i = 0; i < ab->nr_events; i++) { + ui_browser__set_percent_color(browser, + bdl->samples[i].percent, + current_entry); + if (annotate_browser__opts.show_total_period) + slsmg_printf("%6" PRIu64 " ", + bdl->samples[i].nr); + else + slsmg_printf("%6.2f ", bdl->samples[i].percent); + } + } else { + slsmg_write_nstring(" ", 7 * ab->nr_events); } } else { ui_browser__set_percent_color(browser, 0, current_entry); - slsmg_write_nstring(" ", pcnt_width); + slsmg_write_nstring(" ", 7 * ab->nr_events); + } + if (ab->have_cycles) { + if (dl->ipc) + slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc); + else + slsmg_write_nstring(" ", IPC_WIDTH); + if (dl->cycles) + slsmg_printf("%*" PRIu64 " ", + CYCLES_WIDTH - 1, dl->cycles); + else + slsmg_write_nstring(" ", CYCLES_WIDTH); } SLsmg_write_char(' '); @@ -232,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) unsigned int from, to; struct map_symbol *ms = ab->b.priv; struct symbol *sym = ms->sym; - u8 pcnt_width = 7; + u8 pcnt_width = annotate_browser__pcnt_width(ab); /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -256,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } - pcnt_width *= ab->nr_events; - ui_browser__set_color(browser, HE_COLORSET_CODE); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); @@ -267,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); int ret = ui_browser__list_head_refresh(browser); - int pcnt_width; - - pcnt_width = 7 * ab->nr_events; + int pcnt_width = annotate_browser__pcnt_width(ab); if (annotate_browser__opts.jump_arrows) annotate_browser__draw_current_jump(browser); From a18b027efe1a2a502d98a8d0ea0391a72bf3f696 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:52 -0700 Subject: [PATCH 19/26] perf top: Add branch annotation code to top Now that we can process branch data in annotate it makes sense to support enabling branch recording from top too. Most of the code needed for this is already in shared code with report. But we need to add: - The option parsing code (using shared code from the previous patch) - Document the options - Set up the IPC/cycles accounting state in the top session - Call the accounting code in the hist iter callback Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-8-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 21 +++++++++++++++++++++ tools/perf/builtin-top.c | 9 +++++++++ 2 files changed, 30 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 776aec4d0927..f6a23eb294e7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -208,6 +208,27 @@ Default is to monitor all CPUS. This option sets the time out limit. The default value is 500 ms. +-b:: +--branch-any:: + Enable taken branch stack sampling. Any type of taken branch may be sampled. + This is a shortcut for --branch-filter any. See --branch-filter for more infos. + +-j:: +--branch-filter:: + Enable taken branch stack sampling. Each sample captures a series of consecutive + taken branches. The number of branches captured with each sample depends on the + underlying hardware, the type of branches of interest, and the executed code. + It is possible to select the types of branches captured by enabling filters. + For a full list of modifiers please see the perf record manpage. + + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. + The privilege levels may be omitted, in which case, the privilege levels of the associated + event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege + levels are subject to permissions. When sampling on multiple events, branch stack sampling + is enabled for all the sampling events. The sampled branch type is the same for all events. + The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k + Note that this feature may not be available on all processors. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ecf319728f25..bfe24f1e362f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -40,6 +40,7 @@ #include "util/xyarray.h" #include "util/sort.h" #include "util/intlist.h" +#include "util/parse-branch-options.h" #include "arch/common.h" #include "util/debug.h" @@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, perf_top__record_precise_ip(top, he, evsel->idx, ip); } + hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); return 0; } @@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "don't try to adjust column width, use these fixed values"), OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack, + "branch any", "sample any taken branches", + parse_branch_stack), + OPT_CALLBACK('j', "branch-filter", &opts->branch_stack, + "branch filter mask", "branch stack filter modes", + parse_branch_stack), OPT_END() }; const char * const top_usage[] = { From 40997d6cf9fc40c85dba479e162a89e7530eb360 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:53 -0700 Subject: [PATCH 20/26] perf report: Display cycles in branch sort mode Display the cycles by default in branch sort mode. To make enough room for the new column I removed dso_to. It is usually redundant with dso_from. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-9-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5b7a50c04e45..5177088a71d3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -9,7 +9,7 @@ regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char default_sort_order[] = "comm,dso,symbol"; -const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to"; +const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; From 74d4582f430a797564f92fbff0bd3a21945528b7 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 18 Jul 2015 11:30:11 +0300 Subject: [PATCH 21/26] perf tools xtensa: Add DWARF register names Signed-off-by: Max Filippov Cc: Chris Zankel Cc: Marc Gauthier Cc: Paul Mackerras Cc: Peter Zijlstra Cc: linux-xtensa@linux-xtensa.org Link: http://lkml.kernel.org/r/1437208216-15729-9-git-send-email-jcmvbkbc@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/xtensa/Build | 1 + tools/perf/arch/xtensa/Makefile | 3 +++ tools/perf/arch/xtensa/util/Build | 1 + tools/perf/arch/xtensa/util/dwarf-regs.c | 25 ++++++++++++++++++++++++ 4 files changed, 30 insertions(+) create mode 100644 tools/perf/arch/xtensa/Build create mode 100644 tools/perf/arch/xtensa/Makefile create mode 100644 tools/perf/arch/xtensa/util/Build create mode 100644 tools/perf/arch/xtensa/util/dwarf-regs.c diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/xtensa/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile new file mode 100644 index 000000000000..7fbca175099e --- /dev/null +++ b/tools/perf/arch/xtensa/Makefile @@ -0,0 +1,3 @@ +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/xtensa/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c new file mode 100644 index 000000000000..4dba76bfb4ce --- /dev/null +++ b/tools/perf/arch/xtensa/util/dwarf-regs.c @@ -0,0 +1,25 @@ +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (c) 2015 Cadence Design Systems Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#define XTENSA_MAX_REGS 16 + +const char *xtensa_regs_table[XTENSA_MAX_REGS] = { + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", +}; + +const char *get_arch_regstr(unsigned int n) +{ + return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; +} From f70cfa07e3675a115265e32d6357272275358cdb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:46 +0300 Subject: [PATCH 22/26] perf auxtrace: Fix period type 'i' not working PERF_ITRACE_PERIOD_INSTRUCTIONS is zero so it got overwritten by the default period type. Fix by checking if the period type was set rather than if the value was zero when applying the default. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-12-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 83d9dd96fe08..a25b3609cef8 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, struct itrace_synth_opts *synth_opts = opt->value; const char *p; char *endptr; + bool period_type_set = false; synth_opts->set = true; @@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'i': synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + period_type_set = true; break; case 't': synth_opts->period_type = PERF_ITRACE_PERIOD_TICKS; + period_type_set = true; break; case 'm': synth_opts->period *= 1000; @@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, goto out_err; synth_opts->period_type = PERF_ITRACE_PERIOD_NANOSECS; + period_type_set = true; break; case '\0': goto out; @@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } out: if (synth_opts->instructions) { - if (!synth_opts->period_type) + if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; if (!synth_opts->period) From 8bd1b2d2578ca2688969352ed1f8a0a8f10dbb63 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:47 +0300 Subject: [PATCH 23/26] perf tools: Fix perf-with-kcore handling of arguments containing spaces Fix the perf-with-kcore script so that it doesn't split arguments that contain spaces. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-13-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-with-kcore.sh | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh index c7ff90a90e4e..7e47a7cbc195 100644 --- a/tools/perf/perf-with-kcore.sh +++ b/tools/perf/perf-with-kcore.sh @@ -50,7 +50,7 @@ copy_kcore() fi rm -f perf.data.junk - ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null & + ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null & PERF_PID=$! # Need to make sure that perf has started @@ -160,18 +160,18 @@ record() echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 fi - if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 fi - if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 fi - if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then + if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then true - elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then + elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then true elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 @@ -193,8 +193,8 @@ record() mkdir "$PERF_DATA_DIR" - echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*" - "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true + echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@" + "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then exit 1 @@ -209,8 +209,8 @@ subcommand() { find_perf check_buildid_cache_permissions - echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*" - "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $* + echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@" + "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@" } if [ "$1" = "fix_buildid_cache_permissions" ] ; then @@ -234,7 +234,7 @@ fi case "$PERF_SUB_COMMAND" in "record") while [ "$1" != "--" ] ; do - PERF_OPTIONS+="$1 " + PERF_OPTIONS+=("$1") shift || break done if [ "$1" != "--" ] ; then @@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in usage fi shift - record $* + record "$@" ;; "script") - subcommand $* + subcommand "$@" ;; "report") - subcommand $* + subcommand "$@" ;; "inject") - subcommand $* + subcommand "$@" ;; *) usage From 09ff607176ab2bf7e038150100fdf9290a6fbe47 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:49 +0300 Subject: [PATCH 24/26] perf tools: Add perf_pmu__format_bits() Add perf_pmu__format_bits() to get the format bits for a PMU config term. Intel PT will use this to validate terms and to record format bits to enable later interpreting the config from the attribute stored in the perf.data file. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 17 ++++++++++++++++- tools/perf/util/pmu.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b615cdf211d6..c548ec89c8bc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name) } static struct perf_pmu_format * -pmu_find_format(struct list_head *formats, char *name) +pmu_find_format(struct list_head *formats, const char *name) { struct perf_pmu_format *format; @@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name) return NULL; } +__u64 perf_pmu__format_bits(struct list_head *formats, const char *name) +{ + struct perf_pmu_format *format = pmu_find_format(formats, name); + __u64 bits = 0; + int fbit; + + if (!format) + return 0; + + for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS) + bits |= 1ULL << fbit; + + return bits; +} + /* * Sets value based on the format definition (format parameter) * and unformated value (value parameter). diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 7b9c8cf8ae3e..5d7e84466bee 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms, bool zero, struct parse_events_error *error); +__u64 perf_pmu__format_bits(struct list_head *formats, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, From 0efe6b67690b6546daa0d2f34a17eb3ca46c9dea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:50 +0300 Subject: [PATCH 25/26] perf tools: Validate config term maximum value Currently the value of a PMU config term is silently truncated if it is too big. This is an impediment to validating the value for other criteria later on i.e. the user provides an invalid value that gets truncated to a valid one. The maximum value validation is only done for the parser where the error is passed back to the user. In other cases the silent truncation continues so as not to affect tools that perhaps rely on it. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-16-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c548ec89c8bc..d4b0e6454bc6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -589,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, } } +static __u64 pmu_format_max_value(const unsigned long *format) +{ + int w; + + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; +} + /* * Term is a string term, and might be a param-term. Try to look up it's value * in the remaining terms. @@ -662,7 +674,7 @@ static int pmu_config_term(struct list_head *formats, { struct perf_pmu_format *format; __u64 *vp; - __u64 val; + __u64 val, max_val; /* * If this is a parameter we've already used for parameterized-eval, @@ -728,6 +740,22 @@ static int pmu_config_term(struct list_head *formats, } else return -EINVAL; + max_val = pmu_format_max_value(format->bits); + if (val > max_val) { + if (err) { + err->idx = term->err_val; + if (asprintf(&err->str, + "value too big for format, maximum is %llu", + (unsigned long long)max_val) < 0) + err->str = strdup("value too big for format"); + return -EINVAL; + } + /* + * Assume we don't care if !err, in which case the value will be + * silently truncated. + */ + } + pmu_format_value(format->bits, val, vp, zero); return 0; } From 141b2d3161f19a774b3ceaa8faed5e63484a4684 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:51 +0300 Subject: [PATCH 26/26] perf tools: Extend the event parser maximum error index Extend the event parser maximum error index from 10 to 13. That allows PMU config terms of up to 10 characters to display un-truncated in the error message. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-17-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a6cb9afc20e2..828936dc3f1e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err, * Maximum error index indent, we will cut * the event string if it's bigger. */ - int max_err_idx = 10; + int max_err_idx = 13; /* * Let's be specific with the message when