From 834fd46ddb50953cf3fd9caa3f35485715c62ea3 Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Thu, 6 Aug 2015 11:24:29 +0200
Subject: [PATCH 01/26] perf trace: Add total time column to summary.

It is cumbersome to manually calculate the total time spent in a given
syscall by multiplying the average value with the number of calls.

Instead, we now do this directly inside perf trace.

Note that this is also done by 'strace', which even adds a column with
relative numbers - something we could do in the future.

Example:

  perf trace -s find /some/folder > /dev/null

   Summary of events:

   find (19976), 700123 events, 100.0%, 0.000 msec

     syscall            calls    total       min       avg       max      stddev
                                 (msec)    (msec)    (msec)    (msec)        (%)
     --------------- -------- --------- --------- --------- ---------     ------
     read                   4     0.006     0.001     0.002     0.003     27.42%
     write               8046     9.617     0.001     0.001     0.035      0.56%
     open               34196    40.384     0.001     0.001     0.071      0.30%
     close              68375    57.104     0.001     0.001     0.076      0.25%
     stat                   4     0.004     0.001     0.001     0.001      3.14%
     fstat              34189    27.518     0.001     0.001     0.060      0.34%
     mmap                  13     0.029     0.001     0.002     0.003     10.74%
     mprotect               6     0.018     0.002     0.003     0.005     17.04%
     munmap                 3     0.014     0.003     0.005     0.006     24.87%
     brk                   87     0.490     0.001     0.006     0.016      6.50%
     ioctl                  3     0.004     0.001     0.001     0.003     36.39%
     access                 1     0.004     0.004     0.004     0.004      0.00%
     uname                  1     0.001     0.001     0.001     0.001      0.00%
     getdents           68393   143.600     0.001     0.002     0.187      0.95%
     fchdir             68371    56.980     0.001     0.001     0.111      0.39%
     arch_prctl             1     0.001     0.001     0.001     0.001      0.00%
     openat             34184    41.737     0.001     0.001     0.102      0.41%
     newfstatat         34184    41.180     0.001     0.001     0.064      0.34%

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
LPU-Reference: 1438853069-5902-1-git-send-email-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a47497011c93..a25048c85b76 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2773,9 +2773,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 
 	printed += fprintf(fp, "\n");
 
-	printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
-	printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
-	printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
+	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
+	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
+	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
 
 	/* each int_node is a syscall */
 	while (inode) {
@@ -2792,8 +2792,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 
 			sc = &trace->syscalls.table[inode->i];
 			printed += fprintf(fp, "   %-15s", sc->name);
-			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
-					   n, min, avg);
+			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
+					   n, avg * n, min, avg);
 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
 		}
 

From 098d2164e3441c252eaa28906d45e16b7bf1bd2b Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 1 Jul 2015 02:13:49 +0000
Subject: [PATCH 02/26] bpf: Use correct #ifdef controller for trace_call_bpf()

Commit e1abf2cc8d5d80b41c4419368ec743ccadbb131e ("bpf: Fix the build on
BPF_SYSCALL=y && !CONFIG_TRACING kernels, make it more configurable")
updated the building condition of bpf_trace.o from CONFIG_BPF_SYSCALL
to CONFIG_BPF_EVENTS, but the corresponding #ifdef controller in
trace_events.h for trace_call_bpf() was not changed. Which, in theory,
is incorrect.

With current Kconfigs, we can create a .config with CONFIG_BPF_SYSCALL=y
and CONFIG_BPF_EVENTS=n by unselecting CONFIG_KPROBE_EVENT and
selecting CONFIG_BPF_SYSCALL. With these options, trace_call_bpf() will
be defined as an extern function, but if anyone calls it a symbol missing
error will be triggered since bpf_trace.o was not built.

This patch changes the #ifdef controller for trace_call_bpf() from
CONFIG_BPF_SYSCALL to CONFIG_BPF_EVENTS. I'll show its correctness:

Before this patch:

   BPF_SYSCALL   BPF_EVENTS   trace_call_bpf   bpf_trace.o
   y             y           normal           compiled
   n             n           inline           not compiled
   y             n           normal           not compiled (incorrect)
   n             y          impossible (BPF_EVENTS depends on BPF_SYSCALL)

After this patch:

   BPF_SYSCALL   BPF_EVENTS   trace_call_bpf   bpf_trace.o
   y             y           normal           compiled
   n             n           inline           not compiled
   y             n           inline           not compiled (fixed)
   n             y          impossible (BPF_EVENTS depends on BPF_SYSCALL)

So this patch doesn't break anything. QED.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1435716878-189507-2-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 1063c850dbab..180dbf8720f9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -542,7 +542,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
 		event_triggers_post_call(file, tt);
 }
 
-#ifdef CONFIG_BPF_SYSCALL
+#ifdef CONFIG_BPF_EVENTS
 unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
 #else
 static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)

From 04a22fae4cbc1f7d3f7471e9b36359f98bd3f043 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 1 Jul 2015 02:13:50 +0000
Subject: [PATCH 03/26] tracing, perf: Implement BPF programs attached to
 uprobes

By copying BPF related operation to uprobe processing path, this patch
allow users attach BPF programs to uprobes like what they are already
doing on kprobes.

After this patch, users are allowed to use PERF_EVENT_IOC_SET_BPF on a
uprobe perf event. Which make it possible to profile user space programs
and kernel events together using BPF.

Because of this patch, CONFIG_BPF_EVENTS should be selected by
CONFIG_UPROBE_EVENT to ensure trace_call_bpf() is compiled even if
KPROBE_EVENT is not set.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1435716878-189507-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/trace_events.h | 5 +++++
 kernel/events/core.c         | 4 ++--
 kernel/trace/Kconfig         | 2 +-
 kernel/trace/trace_uprobe.c  | 5 +++++
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 180dbf8720f9..ed27917cabc9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -243,6 +243,7 @@ enum {
 	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
 	TRACE_EVENT_FL_TRACEPOINT_BIT,
 	TRACE_EVENT_FL_KPROBE_BIT,
+	TRACE_EVENT_FL_UPROBE_BIT,
 };
 
 /*
@@ -257,6 +258,7 @@ enum {
  *  USE_CALL_FILTER - For trace internal events, don't use file filter
  *  TRACEPOINT    - Event is a tracepoint
  *  KPROBE        - Event is a kprobe
+ *  UPROBE        - Event is a uprobe
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -267,8 +269,11 @@ enum {
 	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
+	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
 };
 
+#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
+
 struct trace_event_call {
 	struct list_head	list;
 	struct trace_event_class *class;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bdea12924b11..77f9e5d0e2d1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6846,8 +6846,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 	if (event->tp_event->prog)
 		return -EEXIST;
 
-	if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
-		/* bpf programs can only be attached to kprobes */
+	if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE))
+		/* bpf programs can only be attached to u/kprobes */
 		return -EINVAL;
 
 	prog = bpf_prog_get(prog_fd);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 3b9a48ae153a..1153c43428f3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -434,7 +434,7 @@ config UPROBE_EVENT
 
 config BPF_EVENTS
 	depends on BPF_SYSCALL
-	depends on KPROBE_EVENT
+	depends on KPROBE_EVENT || UPROBE_EVENT
 	bool
 	default y
 	help
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index aa1ea7b36fa8..f97479f1ce35 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
 {
 	struct trace_event_call *call = &tu->tp.call;
 	struct uprobe_trace_entry_head *entry;
+	struct bpf_prog *prog = call->prog;
 	struct hlist_head *head;
 	void *data;
 	int size, esize;
 	int rctx;
 
+	if (prog && !trace_call_bpf(prog, regs))
+		return;
+
 	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 
 	size = esize + tu->tp.size + dsize;
@@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
 		return -ENODEV;
 	}
 
+	call->flags = TRACE_EVENT_FL_UPROBE;
 	call->class->reg = trace_uprobe_register;
 	call->data = tu;
 	ret = trace_add_event_call(call);

From 0af0885ef69c182d1fa6bb201cd0570e9aa384eb Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 31 Jul 2015 10:35:33 -0300
Subject: [PATCH 04/26] perf tools: Introduce veprintf

va_args alternative to eprintf().

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/n/1436445342-1402-19-git-send-email-wangnan0@huawei.com
[ split from another patch ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/debug.c | 5 +++++
 tools/perf/util/debug.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 2da5581ec74d..86d9c7302598 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
 	return ret;
 }
 
+int veprintf(int level, int var, const char *fmt, va_list args)
+{
+	return _eprintf(level, var, fmt, args);
+}
+
 int eprintf(int level, int var, const char *fmt, ...)
 {
 	va_list args;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index caac2fdc6105..8b9a088c32ab 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...);
 
 int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
+int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
 

From 5a023b57a8e96327925a39312bccc443a7c540b6 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 19 Jun 2015 08:42:48 +0000
Subject: [PATCH 05/26] perf tools: Add missing forward declaration of struct
 map to probe-event.h

Commit 7b6ff0bdbf4f7f429c2116cca92a6d171217449e ("perf probe ppc64le:
Fixup function entry if using kallsyms lookup") adds 'struct map' into
probe-event.h but not forward declares it. This patch fixes it.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Fixes: 7b6ff0bdbf4f ("perf probe ppc64le: Fixup function entry if using kallsyms lookup")
Link: http://lkml.kernel.org/n/1436445342-1402-30-git-send-email-wangnan0@huawei.com
[ No need to include map.h, just forward declare 'struct map' ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 20f555d1ae1c..83ee95e9743b 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -106,6 +106,8 @@ struct variable_list {
 	struct strlist			*vars;	/* Available variables */
 };
 
+struct map;
+
 /* Command string to events */
 extern int parse_perf_probe_command(const char *cmd,
 				    struct perf_probe_event *pev);

From 421a50f3fafaf271bb3293378eaafca71337dfec Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 21 Jul 2015 14:31:22 +0200
Subject: [PATCH 06/26] perf stat: Introduce struct perf_stat_config

Moving 'aggr_mode' into new struct. The point is to centralize the base
stat config so it could be used localy together with other stat routines
in other parts of perf code.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1437481927-29538-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 39 ++++++++++++++++++++++-----------------
 tools/perf/util/stat.h    |  4 ++++
 2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d99d850e1444..bafb830b1bd9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -102,7 +102,6 @@ static struct target target = {
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
-static enum aggr_mode		aggr_mode			= AGGR_GLOBAL;
 static volatile pid_t		child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
@@ -126,6 +125,10 @@ static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
 
 static volatile int done = 0;
 
+static struct perf_stat_config stat_config = {
+	.aggr_mode	= AGGR_GLOBAL,
+};
+
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
 				 struct timespec *b)
 {
@@ -230,7 +233,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 	if (skip)
 		count = &zero;
 
-	switch (aggr_mode) {
+	switch (stat_config.aggr_mode) {
 	case AGGR_THREAD:
 	case AGGR_CORE:
 	case AGGR_SOCKET:
@@ -238,7 +241,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 		if (!evsel->snapshot)
 			perf_evsel__compute_deltas(evsel, cpu, thread, count);
 		perf_counts_values__scale(count, scale, NULL);
-		if (aggr_mode == AGGR_NONE)
+		if (stat_config.aggr_mode == AGGR_NONE)
 			perf_stat__update_shadow_stats(evsel, count->values, cpu);
 		break;
 	case AGGR_GLOBAL:
@@ -291,7 +294,7 @@ static int process_counter(struct perf_evsel *counter)
 	if (ret)
 		return ret;
 
-	if (aggr_mode != AGGR_GLOBAL)
+	if (stat_config.aggr_mode != AGGR_GLOBAL)
 		return 0;
 
 	if (!counter->snapshot)
@@ -578,7 +581,7 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 
 static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 {
-	switch (aggr_mode) {
+	switch (stat_config.aggr_mode) {
 	case AGGR_CORE:
 		fprintf(output, "S%d-C%*d%s%*d%s",
 			cpu_map__id_to_socket(id),
@@ -670,7 +673,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
 	aggr_printout(evsel, id, nr);
 
-	if (aggr_mode == AGGR_GLOBAL)
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
 		cpu = 0;
 
 	fprintf(output, fmt, avg, csv_sep);
@@ -688,7 +691,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	if (csv_output || interval)
 		return;
 
-	perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
+	perf_stat__print_shadow_stats(output, evsel, avg, cpu,
+				      stat_config.aggr_mode);
 }
 
 static void print_aggr(char *prefix)
@@ -909,7 +913,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
 	if (num_print_interval == 0 && !csv_output) {
-		switch (aggr_mode) {
+		switch (stat_config.aggr_mode) {
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
 			break;
@@ -985,7 +989,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	else
 		print_header(argc, argv);
 
-	switch (aggr_mode) {
+	switch (stat_config.aggr_mode) {
 	case AGGR_CORE:
 	case AGGR_SOCKET:
 		print_aggr(prefix);
@@ -1064,7 +1068,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
 
 static int perf_stat_init_aggr_mode(void)
 {
-	switch (aggr_mode) {
+	switch (stat_config.aggr_mode) {
 	case AGGR_SOCKET:
 		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
 			perror("cannot build socket map");
@@ -1286,7 +1290,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			   stat__set_big_num),
 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
 		    "list of cpus to monitor in system-wide"),
-	OPT_SET_UINT('A', "no-aggr", &aggr_mode,
+	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
 		    "disable CPU count aggregation", AGGR_NONE),
 	OPT_STRING('x', "field-separator", &csv_sep, "separator",
 		   "print counts with custom separator"),
@@ -1302,11 +1306,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			"command to run after to the measured command"),
 	OPT_UINTEGER('I', "interval-print", &interval,
 		    "print counts at regular interval in ms (>= 100)"),
-	OPT_SET_UINT(0, "per-socket", &aggr_mode,
+	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
 		     "aggregate counts per processor socket", AGGR_SOCKET),
-	OPT_SET_UINT(0, "per-core", &aggr_mode,
+	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
 		     "aggregate counts per physical processor core", AGGR_CORE),
-	OPT_SET_UINT(0, "per-thread", &aggr_mode,
+	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
 		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
@@ -1399,7 +1403,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		run_count = 1;
 	}
 
-	if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
+	if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
 		fprintf(stderr, "The --per-thread option is only available "
 			"when monitoring via -p -t options.\n");
 		parse_options_usage(NULL, options, "p", 1);
@@ -1411,7 +1415,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	 * no_aggr, cgroup are for system-wide only
 	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
 	 */
-	if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
+	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
+	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
 	    !target__has_cpu(&target)) {
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
@@ -1444,7 +1449,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	 * Initialize thread_map with comm names,
 	 * so we could print it out on output.
 	 */
-	if (aggr_mode == AGGR_THREAD)
+	if (stat_config.aggr_mode == AGGR_THREAD)
 		thread_map__read_comms(evsel_list->threads);
 
 	if (interval && interval < 100) {
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 1cfbe0a980ac..078bee49ccad 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -50,6 +50,10 @@ struct perf_counts {
 	struct xyarray		  *values;
 };
 
+struct perf_stat_config {
+	enum aggr_mode	aggr_mode;
+};
+
 static inline struct perf_counts_values*
 perf_counts(struct perf_counts *counts, int cpu, int thread)
 {

From 711a572ea8ae7e9ab6575403c6d632d058d5cb3d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 21 Jul 2015 14:31:23 +0200
Subject: [PATCH 07/26] perf stat: Move 'scale' into struct perf_stat_config

Moving 'scale' into struct perf_stat_config. The point is to centralize
the base stat config so it could be used localy together with other stat
routines in other parts of perf code.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1437481927-29538-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 12 ++++++------
 tools/perf/util/stat.h    |  1 +
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index bafb830b1bd9..3fb2865e519a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -101,7 +101,6 @@ static struct target target = {
 
 static int			run_count			=  1;
 static bool			no_inherit			= false;
-static bool			scale				=  true;
 static volatile pid_t		child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
@@ -127,6 +126,7 @@ static volatile int done = 0;
 
 static struct perf_stat_config stat_config = {
 	.aggr_mode	= AGGR_GLOBAL,
+	.scale		= true,
 };
 
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
@@ -151,7 +151,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
 	struct perf_event_attr *attr = &evsel->attr;
 
-	if (scale)
+	if (stat_config.scale)
 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 				    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
@@ -240,13 +240,13 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 	case AGGR_NONE:
 		if (!evsel->snapshot)
 			perf_evsel__compute_deltas(evsel, cpu, thread, count);
-		perf_counts_values__scale(count, scale, NULL);
+		perf_counts_values__scale(count, stat_config.scale, NULL);
 		if (stat_config.aggr_mode == AGGR_NONE)
 			perf_stat__update_shadow_stats(evsel, count->values, cpu);
 		break;
 	case AGGR_GLOBAL:
 		aggr->val += count->val;
-		if (scale) {
+		if (stat_config.scale) {
 			aggr->ena += count->ena;
 			aggr->run += count->run;
 		}
@@ -299,7 +299,7 @@ static int process_counter(struct perf_evsel *counter)
 
 	if (!counter->snapshot)
 		perf_evsel__compute_deltas(counter, -1, -1, aggr);
-	perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
+	perf_counts_values__scale(aggr, stat_config.scale, &counter->counts->scaled);
 
 	for (i = 0; i < 3; i++)
 		update_stats(&ps->res_stats[i], count[i]);
@@ -1274,7 +1274,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "system-wide collection from all CPUs"),
 	OPT_BOOLEAN('g', "group", &group,
 		    "put the counters into a counter group"),
-	OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
+	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_INTEGER('r', "repeat", &run_count,
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 078bee49ccad..0a1d83faa7b9 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -52,6 +52,7 @@ struct perf_counts {
 
 struct perf_stat_config {
 	enum aggr_mode	aggr_mode;
+	bool		scale;
 };
 
 static inline struct perf_counts_values*

From 5821522e9484a8b503f89aa546085900b99589e9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 21 Jul 2015 14:31:24 +0200
Subject: [PATCH 08/26] perf stat: Move 'output' into struct perf_stat_config

Moving 'output' into struct perf_stat_config. The point is to centralize
the base stat config so it could be used localy together with other stat
routines in other parts of perf code.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1437481927-29538-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 35 +++++++++++++++++++++++------------
 tools/perf/util/stat.h    |  1 +
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3fb2865e519a..e3ea8b67703d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -110,7 +110,6 @@ static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
 static bool			csv_output			= false;
 static bool			group				= false;
-static FILE			*output				= NULL;
 static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
@@ -305,7 +304,7 @@ static int process_counter(struct perf_evsel *counter)
 		update_stats(&ps->res_stats[i], count[i]);
 
 	if (verbose) {
-		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+		fprintf(stat_config.output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 			perf_evsel__name(counter), count[0], count[1], count[2]);
 	}
 
@@ -548,13 +547,13 @@ static int run_perf_stat(int argc, const char **argv)
 static void print_running(u64 run, u64 ena)
 {
 	if (csv_output) {
-		fprintf(output, "%s%" PRIu64 "%s%.2f",
+		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
 					csv_sep,
 					run,
 					csv_sep,
 					ena ? 100.0 * run / ena : 100.0);
 	} else if (run != ena) {
-		fprintf(output, "  (%.2f%%)", 100.0 * run / ena);
+		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
 	}
 }
 
@@ -563,9 +562,9 @@ static void print_noise_pct(double total, double avg)
 	double pct = rel_stddev_stats(total, avg);
 
 	if (csv_output)
-		fprintf(output, "%s%.2f%%", csv_sep, pct);
+		fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
 	else if (pct)
-		fprintf(output, "  ( +-%6.2f%% )", pct);
+		fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
 }
 
 static void print_noise(struct perf_evsel *evsel, double avg)
@@ -583,7 +582,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 {
 	switch (stat_config.aggr_mode) {
 	case AGGR_CORE:
-		fprintf(output, "S%d-C%*d%s%*d%s",
+		fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
 			cpu_map__id_to_socket(id),
 			csv_output ? 0 : -8,
 			cpu_map__id_to_cpu(id),
@@ -593,7 +592,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 			csv_sep);
 		break;
 	case AGGR_SOCKET:
-		fprintf(output, "S%*d%s%*d%s",
+		fprintf(stat_config.output, "S%*d%s%*d%s",
 			csv_output ? 0 : -5,
 			id,
 			csv_sep,
@@ -602,12 +601,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 			csv_sep);
 			break;
 	case AGGR_NONE:
-		fprintf(output, "CPU%*d%s",
+		fprintf(stat_config.output, "CPU%*d%s",
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[id], csv_sep);
 		break;
 	case AGGR_THREAD:
-		fprintf(output, "%*s-%*d%s",
+		fprintf(stat_config.output, "%*s-%*d%s",
 			csv_output ? 0 : 16,
 			thread_map__comm(evsel->threads, id),
 			csv_output ? 0 : -8,
@@ -622,6 +621,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
+	FILE *output = stat_config.output;
 	double msecs = avg / 1e6;
 	const char *fmt_v, *fmt_n;
 	char name[25];
@@ -658,6 +658,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
+	FILE *output = stat_config.output;
 	double sc =  evsel->scale;
 	const char *fmt;
 	int cpu = cpu_map__id_to_cpu(id);
@@ -697,6 +698,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 
 static void print_aggr(char *prefix)
 {
+	FILE *output = stat_config.output;
 	struct perf_evsel *counter;
 	int cpu, cpu2, s, s2, id, nr;
 	double uval;
@@ -765,6 +767,7 @@ static void print_aggr(char *prefix)
 
 static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
 {
+	FILE *output = stat_config.output;
 	int nthreads = thread_map__nr(counter->threads);
 	int ncpus = cpu_map__nr(counter->cpus);
 	int cpu, thread;
@@ -803,6 +806,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
  */
 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 {
+	FILE *output = stat_config.output;
 	struct perf_stat *ps = counter->priv;
 	double avg = avg_stats(&ps->res_stats[0]);
 	int scaled = counter->counts->scaled;
@@ -854,6 +858,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
  */
 static void print_counter(struct perf_evsel *counter, char *prefix)
 {
+	FILE *output = stat_config.output;
 	u64 ena, run, val;
 	double uval;
 	int cpu;
@@ -908,6 +913,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 
 static void print_interval(char *prefix, struct timespec *ts)
 {
+	FILE *output = stat_config.output;
 	static int num_print_interval;
 
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
@@ -938,6 +944,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 
 static void print_header(int argc, const char **argv)
 {
+	FILE *output = stat_config.output;
 	int i;
 
 	fflush(stdout);
@@ -967,6 +974,8 @@ static void print_header(int argc, const char **argv)
 
 static void print_footer(void)
 {
+	FILE *output = stat_config.output;
+
 	if (!null_run)
 		fprintf(output, "\n");
 	fprintf(output, " %17.9f seconds time elapsed",
@@ -1013,7 +1022,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	if (!interval && !csv_output)
 		print_footer();
 
-	fflush(output);
+	fflush(stat_config.output);
 }
 
 static volatile int signr = -1;
@@ -1322,6 +1331,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	};
 	int status = -EINVAL, run_idx;
 	const char *mode;
+	FILE *output = stderr;
 
 	setlocale(LC_ALL, "");
 
@@ -1332,7 +1342,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	argc = parse_options(argc, argv, options, stat_usage,
 		PARSE_OPT_STOP_AT_NON_OPTION);
 
-	output = stderr;
 	if (output_name && strcmp(output_name, "-"))
 		output = NULL;
 
@@ -1369,6 +1378,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 	}
 
+	stat_config.output = output;
+
 	if (csv_sep) {
 		csv_output = true;
 		if (!strcmp(csv_sep, "\\t"))
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a1d83faa7b9..ed0e05829cb0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -53,6 +53,7 @@ struct perf_counts {
 struct perf_stat_config {
 	enum aggr_mode	aggr_mode;
 	bool		scale;
+	FILE		*output;
 };
 
 static inline struct perf_counts_values*

From ec0d3d1fd292adb80372193c03d859e9cbefd367 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 21 Jul 2015 14:31:25 +0200
Subject: [PATCH 09/26] perf stat: Move 'interval' into struct perf_stat_config

Moving 'interval' into struct perf_stat_config. The point is to
centralize the base stat config so it could be used localy together with
other stat routines in other parts of perf code.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1437481927-29538-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 14 +++++++++-----
 tools/perf/util/stat.h    |  1 +
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e3ea8b67703d..1bdfec8f5fe6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -113,7 +113,6 @@ static bool			group				= false;
 static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
-static unsigned int		interval			= 0;
 static unsigned int		initial_delay			= 0;
 static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
@@ -404,6 +403,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
 
 static int __run_perf_stat(int argc, const char **argv)
 {
+	int interval = stat_config.interval;
 	char msg[512];
 	unsigned long long t0, t1;
 	struct perf_evsel *counter;
@@ -646,7 +646,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 
-	if (csv_output || interval)
+	if (csv_output || stat_config.interval)
 		return;
 
 	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -689,7 +689,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 
-	if (csv_output || interval)
+	if (csv_output || stat_config.interval)
 		return;
 
 	perf_stat__print_shadow_stats(output, evsel, avg, cpu,
@@ -990,6 +990,7 @@ static void print_footer(void)
 
 static void print_counters(struct timespec *ts, int argc, const char **argv)
 {
+	int interval = stat_config.interval;
 	struct perf_evsel *counter;
 	char buf[64], *prefix = NULL;
 
@@ -1029,7 +1030,7 @@ static volatile int signr = -1;
 
 static void skip_signal(int signo)
 {
-	if ((child_pid == -1) || interval)
+	if ((child_pid == -1) || stat_config.interval)
 		done = 1;
 
 	signr = signo;
@@ -1313,7 +1314,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			"command to run prior to the measured command"),
 	OPT_STRING(0, "post", &post_cmd, "command",
 			"command to run after to the measured command"),
-	OPT_UINTEGER('I', "interval-print", &interval,
+	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
 		    "print counts at regular interval in ms (>= 100)"),
 	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
 		     "aggregate counts per processor socket", AGGR_SOCKET),
@@ -1332,6 +1333,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	int status = -EINVAL, run_idx;
 	const char *mode;
 	FILE *output = stderr;
+	unsigned int interval;
 
 	setlocale(LC_ALL, "");
 
@@ -1342,6 +1344,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	argc = parse_options(argc, argv, options, stat_usage,
 		PARSE_OPT_STOP_AT_NON_OPTION);
 
+	interval = stat_config.interval;
+
 	if (output_name && strcmp(output_name, "-"))
 		output = NULL;
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index ed0e05829cb0..1da706d848fb 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -54,6 +54,7 @@ struct perf_stat_config {
 	enum aggr_mode	aggr_mode;
 	bool		scale;
 	FILE		*output;
+	unsigned int	interval;
 };
 
 static inline struct perf_counts_values*

From 5e5fe748bec771a810b1f44ec9c19e4b92685246 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 21 Jul 2015 14:31:26 +0200
Subject: [PATCH 10/26] perf stat: Pass 'struct perf_stat_config' into
 process_counter()

Passing 'struct perf_stat_config' into process_counter(), so that we can
make process_counter() non static and use it from other places.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1437481927-29538-7-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1bdfec8f5fe6..5a781718c09f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -216,7 +216,8 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
 }
 
 static int
-process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
+process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
+		       int cpu, int thread,
 		       struct perf_counts_values *count)
 {
 	struct perf_counts_values *aggr = &evsel->counts->aggr;
@@ -231,20 +232,20 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 	if (skip)
 		count = &zero;
 
-	switch (stat_config.aggr_mode) {
+	switch (config->aggr_mode) {
 	case AGGR_THREAD:
 	case AGGR_CORE:
 	case AGGR_SOCKET:
 	case AGGR_NONE:
 		if (!evsel->snapshot)
 			perf_evsel__compute_deltas(evsel, cpu, thread, count);
-		perf_counts_values__scale(count, stat_config.scale, NULL);
-		if (stat_config.aggr_mode == AGGR_NONE)
+		perf_counts_values__scale(count, config->scale, NULL);
+		if (config->aggr_mode == AGGR_NONE)
 			perf_stat__update_shadow_stats(evsel, count->values, cpu);
 		break;
 	case AGGR_GLOBAL:
 		aggr->val += count->val;
-		if (stat_config.scale) {
+		if (config->scale) {
 			aggr->ena += count->ena;
 			aggr->run += count->run;
 		}
@@ -255,7 +256,8 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 	return 0;
 }
 
-static int process_counter_maps(struct perf_evsel *counter)
+static int process_counter_maps(struct perf_stat_config *config,
+				struct perf_evsel *counter)
 {
 	int nthreads = thread_map__nr(counter->threads);
 	int ncpus = perf_evsel__nr_cpus(counter);
@@ -266,7 +268,7 @@ static int process_counter_maps(struct perf_evsel *counter)
 
 	for (thread = 0; thread < nthreads; thread++) {
 		for (cpu = 0; cpu < ncpus; cpu++) {
-			if (process_counter_values(counter, cpu, thread,
+			if (process_counter_values(config, counter, cpu, thread,
 						   perf_counts(counter->counts, cpu, thread)))
 				return -1;
 		}
@@ -275,7 +277,8 @@ static int process_counter_maps(struct perf_evsel *counter)
 	return 0;
 }
 
-static int process_counter(struct perf_evsel *counter)
+static int process_counter(struct perf_stat_config *config,
+			   struct perf_evsel *counter)
 {
 	struct perf_counts_values *aggr = &counter->counts->aggr;
 	struct perf_stat *ps = counter->priv;
@@ -288,22 +291,22 @@ static int process_counter(struct perf_evsel *counter)
 	if (counter->per_pkg)
 		zero_per_pkg(counter);
 
-	ret = process_counter_maps(counter);
+	ret = process_counter_maps(&stat_config, counter);
 	if (ret)
 		return ret;
 
-	if (stat_config.aggr_mode != AGGR_GLOBAL)
+	if (config->aggr_mode != AGGR_GLOBAL)
 		return 0;
 
 	if (!counter->snapshot)
 		perf_evsel__compute_deltas(counter, -1, -1, aggr);
-	perf_counts_values__scale(aggr, stat_config.scale, &counter->counts->scaled);
+	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
 
 	for (i = 0; i < 3; i++)
 		update_stats(&ps->res_stats[i], count[i]);
 
 	if (verbose) {
-		fprintf(stat_config.output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 			perf_evsel__name(counter), count[0], count[1], count[2]);
 	}
 
@@ -352,7 +355,7 @@ static void read_counters(bool close_counters)
 		if (read_counter(counter))
 			pr_warning("failed to read counter %s\n", counter->name);
 
-		if (process_counter(counter))
+		if (process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
 
 		if (close_counters) {

From f80010eb230b94e8d9cf5bf83373a097fb5b2dcc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 21 Jul 2015 14:31:27 +0200
Subject: [PATCH 11/26] perf stat: Move counter processing code into stat
 object

Moving counter processing code into stat object as
perf_stat__process_counter.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1437481927-29538-8-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 141 +-------------------------------------
 tools/perf/util/stat.c    | 139 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/stat.h    |   3 +
 3 files changed, 143 insertions(+), 140 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5a781718c09f..a054ddc0b2a0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -179,145 +179,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
-static void zero_per_pkg(struct perf_evsel *counter)
-{
-	if (counter->per_pkg_mask)
-		memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
-}
-
-static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
-{
-	unsigned long *mask = counter->per_pkg_mask;
-	struct cpu_map *cpus = perf_evsel__cpus(counter);
-	int s;
-
-	*skip = false;
-
-	if (!counter->per_pkg)
-		return 0;
-
-	if (cpu_map__empty(cpus))
-		return 0;
-
-	if (!mask) {
-		mask = zalloc(MAX_NR_CPUS);
-		if (!mask)
-			return -ENOMEM;
-
-		counter->per_pkg_mask = mask;
-	}
-
-	s = cpu_map__get_socket(cpus, cpu);
-	if (s < 0)
-		return -1;
-
-	*skip = test_and_set_bit(s, mask) == 1;
-	return 0;
-}
-
-static int
-process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
-		       int cpu, int thread,
-		       struct perf_counts_values *count)
-{
-	struct perf_counts_values *aggr = &evsel->counts->aggr;
-	static struct perf_counts_values zero;
-	bool skip = false;
-
-	if (check_per_pkg(evsel, cpu, &skip)) {
-		pr_err("failed to read per-pkg counter\n");
-		return -1;
-	}
-
-	if (skip)
-		count = &zero;
-
-	switch (config->aggr_mode) {
-	case AGGR_THREAD:
-	case AGGR_CORE:
-	case AGGR_SOCKET:
-	case AGGR_NONE:
-		if (!evsel->snapshot)
-			perf_evsel__compute_deltas(evsel, cpu, thread, count);
-		perf_counts_values__scale(count, config->scale, NULL);
-		if (config->aggr_mode == AGGR_NONE)
-			perf_stat__update_shadow_stats(evsel, count->values, cpu);
-		break;
-	case AGGR_GLOBAL:
-		aggr->val += count->val;
-		if (config->scale) {
-			aggr->ena += count->ena;
-			aggr->run += count->run;
-		}
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-static int process_counter_maps(struct perf_stat_config *config,
-				struct perf_evsel *counter)
-{
-	int nthreads = thread_map__nr(counter->threads);
-	int ncpus = perf_evsel__nr_cpus(counter);
-	int cpu, thread;
-
-	if (counter->system_wide)
-		nthreads = 1;
-
-	for (thread = 0; thread < nthreads; thread++) {
-		for (cpu = 0; cpu < ncpus; cpu++) {
-			if (process_counter_values(config, counter, cpu, thread,
-						   perf_counts(counter->counts, cpu, thread)))
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-static int process_counter(struct perf_stat_config *config,
-			   struct perf_evsel *counter)
-{
-	struct perf_counts_values *aggr = &counter->counts->aggr;
-	struct perf_stat *ps = counter->priv;
-	u64 *count = counter->counts->aggr.values;
-	int i, ret;
-
-	aggr->val = aggr->ena = aggr->run = 0;
-	init_stats(ps->res_stats);
-
-	if (counter->per_pkg)
-		zero_per_pkg(counter);
-
-	ret = process_counter_maps(&stat_config, counter);
-	if (ret)
-		return ret;
-
-	if (config->aggr_mode != AGGR_GLOBAL)
-		return 0;
-
-	if (!counter->snapshot)
-		perf_evsel__compute_deltas(counter, -1, -1, aggr);
-	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
-
-	for (i = 0; i < 3; i++)
-		update_stats(&ps->res_stats[i], count[i]);
-
-	if (verbose) {
-		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-			perf_evsel__name(counter), count[0], count[1], count[2]);
-	}
-
-	/*
-	 * Save the full runtime - to allow normalization during printout:
-	 */
-	perf_stat__update_shadow_stats(counter, count, 0);
-
-	return 0;
-}
-
 /*
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
@@ -355,7 +216,7 @@ static void read_counters(bool close_counters)
 		if (read_counter(counter))
 			pr_warning("failed to read counter %s\n", counter->name);
 
-		if (process_counter(&stat_config, counter))
+		if (perf_stat_process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
 
 		if (close_counters) {
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index f2a0d1521e26..c5c709cdc3ce 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -238,3 +238,142 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist)
 		perf_evsel__reset_counts(evsel);
 	}
 }
+
+static void zero_per_pkg(struct perf_evsel *counter)
+{
+	if (counter->per_pkg_mask)
+		memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
+}
+
+static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
+{
+	unsigned long *mask = counter->per_pkg_mask;
+	struct cpu_map *cpus = perf_evsel__cpus(counter);
+	int s;
+
+	*skip = false;
+
+	if (!counter->per_pkg)
+		return 0;
+
+	if (cpu_map__empty(cpus))
+		return 0;
+
+	if (!mask) {
+		mask = zalloc(MAX_NR_CPUS);
+		if (!mask)
+			return -ENOMEM;
+
+		counter->per_pkg_mask = mask;
+	}
+
+	s = cpu_map__get_socket(cpus, cpu);
+	if (s < 0)
+		return -1;
+
+	*skip = test_and_set_bit(s, mask) == 1;
+	return 0;
+}
+
+static int
+process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
+		       int cpu, int thread,
+		       struct perf_counts_values *count)
+{
+	struct perf_counts_values *aggr = &evsel->counts->aggr;
+	static struct perf_counts_values zero;
+	bool skip = false;
+
+	if (check_per_pkg(evsel, cpu, &skip)) {
+		pr_err("failed to read per-pkg counter\n");
+		return -1;
+	}
+
+	if (skip)
+		count = &zero;
+
+	switch (config->aggr_mode) {
+	case AGGR_THREAD:
+	case AGGR_CORE:
+	case AGGR_SOCKET:
+	case AGGR_NONE:
+		if (!evsel->snapshot)
+			perf_evsel__compute_deltas(evsel, cpu, thread, count);
+		perf_counts_values__scale(count, config->scale, NULL);
+		if (config->aggr_mode == AGGR_NONE)
+			perf_stat__update_shadow_stats(evsel, count->values, cpu);
+		break;
+	case AGGR_GLOBAL:
+		aggr->val += count->val;
+		if (config->scale) {
+			aggr->ena += count->ena;
+			aggr->run += count->run;
+		}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int process_counter_maps(struct perf_stat_config *config,
+				struct perf_evsel *counter)
+{
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = perf_evsel__nr_cpus(counter);
+	int cpu, thread;
+
+	if (counter->system_wide)
+		nthreads = 1;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			if (process_counter_values(config, counter, cpu, thread,
+						   perf_counts(counter->counts, cpu, thread)))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+			      struct perf_evsel *counter)
+{
+	struct perf_counts_values *aggr = &counter->counts->aggr;
+	struct perf_stat *ps = counter->priv;
+	u64 *count = counter->counts->aggr.values;
+	int i, ret;
+
+	aggr->val = aggr->ena = aggr->run = 0;
+	init_stats(ps->res_stats);
+
+	if (counter->per_pkg)
+		zero_per_pkg(counter);
+
+	ret = process_counter_maps(config, counter);
+	if (ret)
+		return ret;
+
+	if (config->aggr_mode != AGGR_GLOBAL)
+		return 0;
+
+	if (!counter->snapshot)
+		perf_evsel__compute_deltas(counter, -1, -1, aggr);
+	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+
+	for (i = 0; i < 3; i++)
+		update_stats(&ps->res_stats[i], count[i]);
+
+	if (verbose) {
+		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+			perf_evsel__name(counter), count[0], count[1], count[2]);
+	}
+
+	/*
+	 * Save the full runtime - to allow normalization during printout:
+	 */
+	perf_stat__update_shadow_stats(counter, count, 0);
+
+	return 0;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 1da706d848fb..0b897b083682 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -116,4 +116,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
 int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct perf_evlist *evlist);
 void perf_evlist__reset_stats(struct perf_evlist *evlist);
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+			      struct perf_evsel *counter);
 #endif

From 93df8a1ed6231727c5db94a80b1a6bd5ee67cec3 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 4 Aug 2015 17:10:27 +0100
Subject: [PATCH 12/26] perf tools: Add empty Build files for architectures
 lacking them

perf currently fails to build on MIPS as there is no
tools/perf/arch/mips/Build file.  Adding an empty file fixes this as
there are no MIPS-specific sources to build.

It looks like the same is needed for Alpha and PA-RISC, though I
haven't been able to test those.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 5e8c0fb6a957 ("perf build: Add arch x86 objects building")
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1438704627.7315.2.camel@decadent.org.uk
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/alpha/Build  | 1 +
 tools/perf/arch/mips/Build   | 1 +
 tools/perf/arch/parisc/Build | 1 +
 3 files changed, 3 insertions(+)
 create mode 100644 tools/perf/arch/alpha/Build
 create mode 100644 tools/perf/arch/mips/Build
 create mode 100644 tools/perf/arch/parisc/Build

diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/alpha/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/mips/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build
new file mode 100644
index 000000000000..1bb8bf6d7fd4
--- /dev/null
+++ b/tools/perf/arch/parisc/Build
@@ -0,0 +1 @@
+# empty

From 0e332f033a8216fa03792fde69882f66500848c7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:46 -0700
Subject: [PATCH 13/26] perf tools: Add support for cycles, weight branch_info
 field

cycles is a new branch_info field available on some CPUs that indicates
the time deltas between branches in the LBR.

Add a sort key and output code for the cycles to allow to display the
basic block cycles individually in perf report.

We also pass in the cycles for weight when LBRs are processed, which
allows to get global and local weight, to get an estimate of the total
cost.

And also print the cycles information for perf report -D.  I also added
printing for the previously missing LBR flags (mispredict etc.)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-2-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  1 +
 tools/perf/util/event.h                  |  3 ++-
 tools/perf/util/hist.c                   |  3 ++-
 tools/perf/util/hist.h                   |  1 +
 tools/perf/util/session.c                | 16 ++++++++++++----
 tools/perf/util/sort.c                   | 24 ++++++++++++++++++++++++
 tools/perf/util/sort.h                   |  1 +
 7 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index c33b69f3374f..960da203ec11 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -109,6 +109,7 @@ OPTIONS
 	- mispredict: "N" for predicted branch, "Y" for mispredicted branch
 	- in_tx: branch in TSX transaction
 	- abort: TSX transaction abort.
+	- cycles: Cycles in basic block
 
 	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
 	and symbol_to, see '--branch-stack'.
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 4bb2ae894c78..f729df5e25e6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -134,7 +134,8 @@ struct branch_flags {
 	u64 predicted:1;
 	u64 in_tx:1;
 	u64 abort:1;
-	u64 reserved:60;
+	u64 cycles:16;
+	u64 reserved:44;
 };
 
 struct branch_entry {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6f28d53d4e46..54fc0033dd6a 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -618,7 +618,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
 	 * and not events sampled. Thus we use a pseudo period of 1.
 	 */
 	he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
-				1, 1, 0, true);
+				1, bi->flags.cycles ? bi->flags.cycles : 1,
+				0, true);
 	if (he == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5ed8d9c22981..3881d9815309 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@ enum hist_column {
 	HISTC_MEM_SNOOP,
 	HISTC_MEM_DCACHELINE,
 	HISTC_TRANSACTION,
+	HISTC_CYCLES,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f51eb54aeeb3..18722e774a69 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -784,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample)
 
 	printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
 
-	for (i = 0; i < sample->branch_stack->nr; i++)
-		printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
-			i, sample->branch_stack->entries[i].from,
-			sample->branch_stack->entries[i].to);
+	for (i = 0; i < sample->branch_stack->nr; i++) {
+		struct branch_entry *e = &sample->branch_stack->entries[i];
+
+		printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+			i, e->from, e->to,
+			e->flags.cycles,
+			e->flags.mispred ? "M" : " ",
+			e->flags.predicted ? "P" : " ",
+			e->flags.abort ? "A" : " ",
+			e->flags.in_tx ? "T" : " ",
+			(unsigned)e->flags.reserved);
+	}
 }
 
 static void regs_dump__printf(u64 mask, u64 *regs)
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4c65a143a34c..5b7a50c04e45 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -526,6 +526,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
 	return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
 }
 
+static int64_t
+sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->branch_info->flags.cycles -
+		right->branch_info->flags.cycles;
+}
+
+static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	if (he->branch_info->flags.cycles == 0)
+		return repsep_snprintf(bf, size, "%-*s", width, "-");
+	return repsep_snprintf(bf, size, "%-*hd", width,
+			       he->branch_info->flags.cycles);
+}
+
+struct sort_entry sort_cycles = {
+	.se_header	= "Basic Block Cycles",
+	.se_cmp		= sort__cycles_cmp,
+	.se_snprintf	= hist_entry__cycles_snprintf,
+	.se_width_idx	= HISTC_CYCLES,
+};
+
 /* --sort daddr_sym */
 static int64_t
 sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
@@ -1190,6 +1213,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
 	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
 	DIM(SORT_IN_TX, "in_tx", sort_in_tx),
 	DIM(SORT_ABORT, "abort", sort_abort),
+	DIM(SORT_CYCLES, "cycles", sort_cycles),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e97cd476d336..bc6c87a76d16 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -185,6 +185,7 @@ enum sort_type {
 	SORT_MISPREDICT,
 	SORT_ABORT,
 	SORT_IN_TX,
+	SORT_CYCLES,
 
 	/* memory mode specific sort keys */
 	__SORT_MEMORY_MODE,

From 98df858ed46ddaaf9be3573eb2b63b57a68c6af7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:47 -0700
Subject: [PATCH 14/26] perf report: Add flag for non ANY branch mode

Later patches need to cheaply check that the branch mode is in ANY.  Add
a new function to check all event attrs and add a flag to the report
state, which is then initialized.

v2: Rename flag

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-3-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c |  7 +++++++
 tools/perf/util/evlist.c    | 10 ++++++++++
 tools/perf/util/evlist.h    |  1 +
 3 files changed, 18 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 95a47719aec3..3ba0e9737dc5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -53,6 +53,7 @@ struct report {
 	bool			mem_mode;
 	bool			header;
 	bool			header_only;
+	bool			nonany_branch_mode;
 	int			max_stack;
 	struct perf_read_values	show_threads_values;
 	const char		*pretty_printing_style;
@@ -258,6 +259,12 @@ static int report__setup_sample_type(struct report *rep)
 		else
 			callchain_param.record_mode = CALLCHAIN_FP;
 	}
+
+	/* ??? handle more cases than just ANY? */
+	if (!(perf_evlist__combined_branch_type(session->evlist) &
+				PERF_SAMPLE_BRANCH_ANY))
+		rep->nonany_branch_mode = true;
+
 	return 0;
 }
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3b9f411a6b46..373f65b02545 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1273,6 +1273,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
 	return __perf_evlist__combined_sample_type(evlist);
 }
 
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	u64 branch_type = 0;
+
+	evlist__for_each(evlist, evsel)
+		branch_type |= evsel->attr.branch_sample_type;
+	return branch_type;
+}
+
 bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
 {
 	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a8930b68456b..397757063da1 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -165,6 +165,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist);
 u64 perf_evlist__read_format(struct perf_evlist *evlist);
 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
 bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
 

From d4957633bf9dab70e566e7dbb2b8d0c61c3a2f1e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:48 -0700
Subject: [PATCH 15/26] perf report: Add infrastructure for a cycles histogram

This adds the basic infrastructure to keep track of cycle counts per
basic block for annotate. We allocate an array similar to the normal
accounting, and then account branch cycles there.

We handle two cases:

cycles per basic block with start and cycles per branch (these are later
used for either IPC or just cycles per BB)

In the start case we cannot handle overlaps, so always the longest basic
block wins.

For the cycles per branch case everything is accurately accounted.

v2: Remove unnecessary checks. Slight restructure. Move
symbol__get_annotation to another patch. Move histogram allocation.
v3: Merged with current tree

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-4-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c |   1 +
 tools/perf/util/annotate.c    | 127 +++++++++++++++++++++++++++++++++-
 tools/perf/util/annotate.h    |  17 +++++
 3 files changed, 142 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 2c1bec39c30e..467a23b14e2f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -187,6 +187,7 @@ static void hists__find_annotations(struct hists *hists,
 			 * symbol, free he->ms.sym->src to signal we already
 			 * processed this symbol.
 			 */
+			zfree(&notes->src->cycles_hist);
 			zfree(&notes->src);
 		}
 	}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 03b7bc70eb66..e0b614648044 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym)
 	return 0;
 }
 
+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	const size_t size = symbol__size(sym);
+
+	notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+	if (notes->src->cycles_hist == NULL)
+		return -1;
+	return 0;
+}
+
 void symbol__annotate_zero_histograms(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
 
 	pthread_mutex_lock(&notes->lock);
-	if (notes->src != NULL)
+	if (notes->src != NULL) {
 		memset(notes->src->histograms, 0,
 		       notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+		if (notes->src->cycles_hist)
+			memset(notes->src->cycles_hist, 0,
+				symbol__size(sym) * sizeof(struct cyc_hist));
+	}
 	pthread_mutex_unlock(&notes->lock);
 }
 
+static int __symbol__account_cycles(struct annotation *notes,
+				    u64 start,
+				    unsigned offset, unsigned cycles,
+				    unsigned have_start)
+{
+	struct cyc_hist *ch;
+
+	ch = notes->src->cycles_hist;
+	/*
+	 * For now we can only account one basic block per
+	 * final jump. But multiple could be overlapping.
+	 * Always account the longest one. So when
+	 * a shorter one has been already seen throw it away.
+	 *
+	 * We separately always account the full cycles.
+	 */
+	ch[offset].num_aggr++;
+	ch[offset].cycles_aggr += cycles;
+
+	if (!have_start && ch[offset].have_start)
+		return 0;
+	if (ch[offset].num) {
+		if (have_start && (!ch[offset].have_start ||
+				   ch[offset].start > start)) {
+			ch[offset].have_start = 0;
+			ch[offset].cycles = 0;
+			ch[offset].num = 0;
+			if (ch[offset].reset < 0xffff)
+				ch[offset].reset++;
+		} else if (have_start &&
+			   ch[offset].start < start)
+			return 0;
+	}
+	ch[offset].have_start = have_start;
+	ch[offset].start = start;
+	ch[offset].cycles += cycles;
+	ch[offset].num++;
+	return 0;
+}
+
 static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 				      struct annotation *notes, int evidx, u64 addr)
 {
@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 	return 0;
 }
 
-static struct annotation *symbol__get_annotation(struct symbol *sym)
+static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles)
 {
 	struct annotation *notes = symbol__annotation(sym);
 
@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym)
 		if (symbol__alloc_hist(sym) < 0)
 			return NULL;
 	}
+	if (!notes->src->cycles_hist && cycles) {
+		if (symbol__alloc_hist_cycles(sym) < 0)
+			return NULL;
+	}
 	return notes;
 }
 
@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 
 	if (sym == NULL)
 		return 0;
-	notes = symbol__get_annotation(sym);
+	notes = symbol__get_annotation(sym, false);
 	if (notes == NULL)
 		return -ENOMEM;
 	return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
 }
 
+static int symbol__account_cycles(u64 addr, u64 start,
+				  struct symbol *sym, unsigned cycles)
+{
+	struct annotation *notes;
+	unsigned offset;
+
+	if (sym == NULL)
+		return 0;
+	notes = symbol__get_annotation(sym, true);
+	if (notes == NULL)
+		return -ENOMEM;
+	if (addr < sym->start || addr >= sym->end)
+		return -ERANGE;
+
+	if (start) {
+		if (start < sym->start || start >= sym->end)
+			return -ERANGE;
+		if (start >= addr)
+			start = 0;
+	}
+	offset = addr - sym->start;
+	return __symbol__account_cycles(notes,
+					start ? start - sym->start : 0,
+					offset, cycles,
+					!!start);
+}
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+				    struct addr_map_symbol *start,
+				    unsigned cycles)
+{
+	unsigned long saddr = 0;
+	int err;
+
+	if (!cycles)
+		return 0;
+
+	/*
+	 * Only set start when IPC can be computed. We can only
+	 * compute it when the basic block is completely in a single
+	 * function.
+	 * Special case the case when the jump is elsewhere, but
+	 * it starts on the function start.
+	 */
+	if (start &&
+		(start->sym == ams->sym ||
+		 (ams->sym &&
+		   start->addr == ams->sym->start + ams->map->start)))
+		saddr = start->al_addr;
+	if (saddr == 0)
+		pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
+			ams->addr,
+			start ? start->addr : 0,
+			ams->sym ? ams->sym->start + ams->map->start : 0,
+			saddr);
+	err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
+	if (err)
+		pr_debug2("account_cycles failed %d\n", err);
+	return err;
+}
+
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx)
 {
 	return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 7e78e6c27078..a06518dca4b7 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -79,6 +79,17 @@ struct sym_hist {
 	u64		addr[0];
 };
 
+struct cyc_hist {
+	u64	start;
+	u64	cycles;
+	u64	cycles_aggr;
+	u32	num;
+	u32	num_aggr;
+	u8	have_start;
+	/* 1 byte padding */
+	u16	reset;
+};
+
 struct source_line_samples {
 	double		percent;
 	double		percent_sum;
@@ -97,6 +108,7 @@ struct source_line {
  * @histogram: Array of addr hit histograms per event being monitored
  * @lines: If 'print_lines' is specified, per source code line percentages
  * @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
  *
  * lines is allocated, percentages calculated and all sorted by percentage
  * when the annotation is about to be presented, so the percentages are for
@@ -109,6 +121,7 @@ struct annotated_source {
 	struct source_line *lines;
 	int    		   nr_histograms;
 	int    		   sizeof_sym_hist;
+	struct cyc_hist	   *cycles_hist;
 	struct sym_hist	   histograms[0];
 };
 
@@ -130,6 +143,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
 
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx);
 
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+				    struct addr_map_symbol *start,
+				    unsigned cycles);
+
 int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
 
 int symbol__alloc_hist(struct symbol *sym);

From 57849998e2cd24d50295076a1bbd2f029e2d7c38 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:49 -0700
Subject: [PATCH 16/26] perf report: Add processing for cycle histograms

Call the earlier added cycle histogram infrastructure from the perf
report hist iter callback. For this we walk the branch records.

This allows to use cycle histograms when browsing perf report annotate.

v2: Rename flag

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-5-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c |  3 +++
 tools/perf/util/hist.c      | 33 +++++++++++++++++++++++++++++++++
 tools/perf/util/hist.h      |  3 +++
 3 files changed, 39 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3ba0e9737dc5..3a9d1b659fcd 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -103,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
 	if (!ui__has_annotation())
 		return 0;
 
+	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+			     rep->nonany_branch_mode);
+
 	if (sort__mode == SORT_MODE__BRANCH) {
 		bi = he->branch_info;
 		err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 54fc0033dd6a..a6e9ddd37913 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1415,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other)
 	return 0;
 }
 
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+			  struct perf_sample *sample, bool nonany_branch_mode)
+{
+	struct branch_info *bi;
+
+	/* If we have branch cycles always annotate them. */
+	if (bs && bs->nr && bs->entries[0].flags.cycles) {
+		int i;
+
+		bi = sample__resolve_bstack(sample, al);
+		if (bi) {
+			struct addr_map_symbol *prev = NULL;
+
+			/*
+			 * Ignore errors, still want to process the
+			 * other entries.
+			 *
+			 * For non standard branch modes always
+			 * force no IPC (prev == NULL)
+			 *
+			 * Note that perf stores branches reversed from
+			 * program order!
+			 */
+			for (i = bs->nr - 1; i >= 0; i--) {
+				addr_map_symbol__account_cycles(&bi[i].from,
+					nonany_branch_mode ? NULL : prev,
+					bi[i].flags.cycles);
+				prev = &bi[i].to;
+			}
+			free(bi);
+		}
+	}
+}
 
 size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
 {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 3881d9815309..e2f712f85d2e 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -350,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused)
 
 unsigned int hists__sort_list_width(struct hists *hists);
 
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+			  struct perf_sample *sample, bool nonany_branch_mode);
+
 struct option;
 int parse_filter_percentage(const struct option *opt __maybe_unused,
 			    const char *arg, int unset __maybe_unused);

From 30e863bb6f708c0abd422fbb0e6b295f5ee6407b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:50 -0700
Subject: [PATCH 17/26] perf annotate: Compute IPC and basic block cycles

Compute the IPC and the basic block cycles for the annotate display.

IPC is computed by counting the instructions, and then dividing the
accounted cycles by that count.

The actual IPC computation can only be done at annotate time, because we
need to parse the objdump output first to know the number of
instructions in the basic block.

The cycles/IPC are also put into the perf function annotation so that
the display code can show them.

Again basic block overlaps are not handled, with the longest winning,
but there are some heuristics to hide the IPC when the longest is not
the most common.

v2: Compute IPC correctly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-6-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 73 ++++++++++++++++++++++++++++++-
 tools/perf/util/annotate.h        |  2 +
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 5995a8bd7c69..6ec179547f72 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -53,6 +53,7 @@ struct annotate_browser {
 	int		    max_jump_sources;
 	int		    nr_jumps;
 	bool		    searching_backwards;
+	bool		    have_cycles;
 	u8		    addr_width;
 	u8		    jumps_width;
 	u8		    target_width;
@@ -390,7 +391,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 				max_percent = bpos->samples[i].percent;
 		}
 
-		if (max_percent < 0.01) {
+		if (max_percent < 0.01 && pos->ipc == 0) {
 			RB_CLEAR_NODE(&bpos->rb_node);
 			continue;
 		}
@@ -869,6 +870,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
+
+static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end)
+{
+	unsigned n_insn = 0;
+	u64 offset;
+
+	for (offset = start; offset <= end; offset++) {
+		if (browser->offsets[offset])
+			n_insn++;
+	}
+	return n_insn;
+}
+
+static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end,
+			   struct cyc_hist *ch)
+{
+	unsigned n_insn;
+	u64 offset;
+
+	n_insn = count_insn(browser, start, end);
+	if (n_insn && ch->num && ch->cycles) {
+		float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+		/* Hide data when there are too many overlaps. */
+		if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
+			return;
+
+		for (offset = start; offset <= end; offset++) {
+			struct disasm_line *dl = browser->offsets[offset];
+
+			if (dl)
+				dl->ipc = ipc;
+		}
+	}
+}
+
+/*
+ * This should probably be in util/annotate.c to share with the tty
+ * annotate, but right now we need the per byte offsets arrays,
+ * which are only here.
+ */
+static void annotate__compute_ipc(struct annotate_browser *browser, size_t size,
+			   struct symbol *sym)
+{
+	u64 offset;
+	struct annotation *notes = symbol__annotation(sym);
+
+	if (!notes->src || !notes->src->cycles_hist)
+		return;
+
+	pthread_mutex_lock(&notes->lock);
+	for (offset = 0; offset < size; ++offset) {
+		struct cyc_hist *ch;
+
+		ch = &notes->src->cycles_hist[offset];
+		if (ch && ch->cycles) {
+			struct disasm_line *dl;
+
+			if (ch->have_start)
+				count_and_fill(browser, ch->start, offset, ch);
+			dl = browser->offsets[offset];
+			if (dl && ch->num_aggr)
+				dl->cycles = ch->cycles_aggr / ch->num_aggr;
+			browser->have_cycles = true;
+		}
+	}
+	pthread_mutex_unlock(&notes->lock);
+}
+
 static void annotate_browser__mark_jump_targets(struct annotate_browser *browser,
 						size_t size)
 {
@@ -991,6 +1061,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 	}
 
 	annotate_browser__mark_jump_targets(&browser, size);
+	annotate__compute_ipc(&browser, size, sym);
 
 	browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size);
 	browser.max_addr_width = hex_width(sym->end);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index a06518dca4b7..e9996092a093 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -59,6 +59,8 @@ struct disasm_line {
 	char		    *name;
 	struct ins	    *ins;
 	int		    line_nr;
+	float		    ipc;
+	u64		    cycles;
 	struct ins_operands ops;
 };
 

From f8f4aaead579c947fb8fc051c9d242037025caf3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:51 -0700
Subject: [PATCH 18/26] perf annotate: Finally display IPC and cycle accounting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add two new columns to the annotate display and display the average
cycles and the compute IPC if available.

When the LBR was not in any branch mode the IPC computation is
automatically disabled. We still display the cycle information.

Example output (with made up numbers):

The second column is the IPC and third average cycles.

                 │    __attribute__((noinline)) f2()
                 │    {
  5.15  0.07     │       push   %rbp
  0.01  0.07     │       mov    %rsp,%rbp
                 │            c = a / b;
  9.87  0.07     │       mov    a,%eax
        0.07     │       mov    b,%ecx
        0.07     │       cltd
  4.92  0.07  123│       idiv   %ecx
 70.79  0.07     │       mov    %eax,__TMC_END__
                 │    }
  9.25  0.07     │       pop    %rbp
  0.01  0.07  123│     ← retq

v2: Fix display problems.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-7-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 57 ++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 6ec179547f72..b5fc847f9660 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -16,6 +16,9 @@ struct disasm_line_samples {
 	u64		nr;
 };
 
+#define IPC_WIDTH 6
+#define CYCLES_WIDTH 6
+
 struct browser_disasm_line {
 	struct rb_node			rb_node;
 	u32				idx;
@@ -97,6 +100,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br
 	 return ui_browser__set_color(&browser->b, color);
 }
 
+static int annotate_browser__pcnt_width(struct annotate_browser *ab)
+{
+	int w = 7 * ab->nr_events;
+
+	if (ab->have_cycles)
+		w += IPC_WIDTH + CYCLES_WIDTH;
+	return w;
+}
+
 static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -107,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 			     (!current_entry || (browser->use_navkeypressed &&
 					         !browser->navkeypressed)));
 	int width = browser->width, printed;
-	int i, pcnt_width = 7 * ab->nr_events;
+	int i, pcnt_width = annotate_browser__pcnt_width(ab);
 	double percent_max = 0.0;
 	char bf[256];
 
@@ -117,19 +129,34 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	}
 
 	if (dl->offset != -1 && percent_max != 0.0) {
-		for (i = 0; i < ab->nr_events; i++) {
-			ui_browser__set_percent_color(browser,
-						      bdl->samples[i].percent,
-						      current_entry);
-			if (annotate_browser__opts.show_total_period)
-				slsmg_printf("%6" PRIu64 " ",
-					     bdl->samples[i].nr);
-			else
-				slsmg_printf("%6.2f ", bdl->samples[i].percent);
+		if (percent_max != 0.0) {
+			for (i = 0; i < ab->nr_events; i++) {
+				ui_browser__set_percent_color(browser,
+							bdl->samples[i].percent,
+							current_entry);
+				if (annotate_browser__opts.show_total_period)
+					slsmg_printf("%6" PRIu64 " ",
+						     bdl->samples[i].nr);
+				else
+					slsmg_printf("%6.2f ", bdl->samples[i].percent);
+			}
+		} else {
+			slsmg_write_nstring(" ", 7 * ab->nr_events);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
-		slsmg_write_nstring(" ", pcnt_width);
+		slsmg_write_nstring(" ", 7 * ab->nr_events);
+	}
+	if (ab->have_cycles) {
+		if (dl->ipc)
+			slsmg_printf("%*.2f ", IPC_WIDTH - 1, dl->ipc);
+		else
+			slsmg_write_nstring(" ", IPC_WIDTH);
+		if (dl->cycles)
+			slsmg_printf("%*" PRIu64 " ",
+				     CYCLES_WIDTH - 1, dl->cycles);
+		else
+			slsmg_write_nstring(" ", CYCLES_WIDTH);
 	}
 
 	SLsmg_write_char(' ');
@@ -232,7 +259,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	unsigned int from, to;
 	struct map_symbol *ms = ab->b.priv;
 	struct symbol *sym = ms->sym;
-	u8 pcnt_width = 7;
+	u8 pcnt_width = annotate_browser__pcnt_width(ab);
 
 	/* PLT symbols contain external offsets */
 	if (strstr(sym->name, "@plt"))
@@ -256,8 +283,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 		to = (u64)btarget->idx;
 	}
 
-	pcnt_width *= ab->nr_events;
-
 	ui_browser__set_color(browser, HE_COLORSET_CODE);
 	__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
 				 from, to);
@@ -267,9 +292,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 {
 	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
 	int ret = ui_browser__list_head_refresh(browser);
-	int pcnt_width;
-
-	pcnt_width = 7 * ab->nr_events;
+	int pcnt_width = annotate_browser__pcnt_width(ab);
 
 	if (annotate_browser__opts.jump_arrows)
 		annotate_browser__draw_current_jump(browser);

From a18b027efe1a2a502d98a8d0ea0391a72bf3f696 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:52 -0700
Subject: [PATCH 19/26] perf top: Add branch annotation code to top

Now that we can process branch data in annotate it makes sense to
support enabling branch recording from top too. Most of the code needed
for this is already in shared code with report. But we need to add:

- The option parsing code (using shared code from the previous patch)
- Document the options
- Set up the IPC/cycles accounting state in the top session
- Call the accounting code in the hist iter callback

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-8-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-top.txt | 21 +++++++++++++++++++++
 tools/perf/builtin-top.c              |  9 +++++++++
 2 files changed, 30 insertions(+)

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 776aec4d0927..f6a23eb294e7 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -208,6 +208,27 @@ Default is to monitor all CPUS.
 	This option sets the time out limit. The default value is 500 ms.
 
 
+-b::
+--branch-any::
+	Enable taken branch stack sampling. Any type of taken branch may be sampled.
+	This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+	Enable taken branch stack sampling. Each sample captures a series of consecutive
+	taken branches. The number of branches captured with each sample depends on the
+	underlying hardware, the type of branches of interest, and the executed code.
+	It is possible to select the types of branches captured by enabling filters.
+	For a full list of modifiers please see the perf record manpage.
+
+	The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+	The privilege levels may be omitted, in which case, the privilege levels of the associated
+	event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+	levels are subject to permissions.  When sampling on multiple events, branch stack sampling
+	is enabled for all the sampling events. The sampled branch type is the same for all events.
+	The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+	Note that this feature may not be available on all processors.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ecf319728f25..bfe24f1e362f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -40,6 +40,7 @@
 #include "util/xyarray.h"
 #include "util/sort.h"
 #include "util/intlist.h"
+#include "util/parse-branch-options.h"
 #include "arch/common.h"
 
 #include "util/debug.h"
@@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
 		perf_top__record_precise_ip(top, he, evsel->idx, ip);
 	}
 
+	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+		     !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
 	return 0;
 }
 
@@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "don't try to adjust column width, use these fixed values"),
 	OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
 			"per thread proc mmap processing timeout in ms"),
+	OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
+		     "branch any", "sample any taken branches",
+		     parse_branch_stack),
+	OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
+		     "branch filter mask", "branch stack filter modes",
+		     parse_branch_stack),
 	OPT_END()
 	};
 	const char * const top_usage[] = {

From 40997d6cf9fc40c85dba479e162a89e7530eb360 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 18 Jul 2015 08:24:53 -0700
Subject: [PATCH 20/26] perf report: Display cycles in branch sort mode

Display the cycles by default in branch sort mode.

To make enough room for the new column I removed dso_to. It is usually
redundant with dso_from.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1437233094-12844-9-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5b7a50c04e45..5177088a71d3 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -9,7 +9,7 @@ regex_t		parent_regex;
 const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
 const char	default_sort_order[] = "comm,dso,symbol";
-const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to";
+const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
 const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
 const char	default_top_sort_order[] = "dso,symbol";
 const char	default_diff_sort_order[] = "dso,symbol";

From 74d4582f430a797564f92fbff0bd3a21945528b7 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Sat, 18 Jul 2015 11:30:11 +0300
Subject: [PATCH 21/26] perf tools xtensa: Add DWARF register names

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Marc Gauthier <marc@cadence.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-xtensa@linux-xtensa.org
Link: http://lkml.kernel.org/r/1437208216-15729-9-git-send-email-jcmvbkbc@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/xtensa/Build             |  1 +
 tools/perf/arch/xtensa/Makefile          |  3 +++
 tools/perf/arch/xtensa/util/Build        |  1 +
 tools/perf/arch/xtensa/util/dwarf-regs.c | 25 ++++++++++++++++++++++++
 4 files changed, 30 insertions(+)
 create mode 100644 tools/perf/arch/xtensa/Build
 create mode 100644 tools/perf/arch/xtensa/Makefile
 create mode 100644 tools/perf/arch/xtensa/util/Build
 create mode 100644 tools/perf/arch/xtensa/util/dwarf-regs.c

diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
new file mode 100644
index 000000000000..54afe4a467e7
--- /dev/null
+++ b/tools/perf/arch/xtensa/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile
new file mode 100644
index 000000000000..7fbca175099e
--- /dev/null
+++ b/tools/perf/arch/xtensa/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
new file mode 100644
index 000000000000..954e287bbb89
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 000000000000..4dba76bfb4ce
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+	"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+	return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}

From f70cfa07e3675a115265e32d6357272275358cdb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 17 Jul 2015 19:33:46 +0300
Subject: [PATCH 22/26] perf auxtrace: Fix period type 'i' not working

PERF_ITRACE_PERIOD_INSTRUCTIONS is zero so it got overwritten by the
default period type.

Fix by checking if the period type was set rather than if the value was
zero when applying the default.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1437150840-31811-12-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 83d9dd96fe08..a25b3609cef8 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -942,6 +942,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 	struct itrace_synth_opts *synth_opts = opt->value;
 	const char *p;
 	char *endptr;
+	bool period_type_set = false;
 
 	synth_opts->set = true;
 
@@ -970,10 +971,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 				case 'i':
 					synth_opts->period_type =
 						PERF_ITRACE_PERIOD_INSTRUCTIONS;
+					period_type_set = true;
 					break;
 				case 't':
 					synth_opts->period_type =
 						PERF_ITRACE_PERIOD_TICKS;
+					period_type_set = true;
 					break;
 				case 'm':
 					synth_opts->period *= 1000;
@@ -986,6 +989,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 						goto out_err;
 					synth_opts->period_type =
 						PERF_ITRACE_PERIOD_NANOSECS;
+					period_type_set = true;
 					break;
 				case '\0':
 					goto out;
@@ -1039,7 +1043,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 	}
 out:
 	if (synth_opts->instructions) {
-		if (!synth_opts->period_type)
+		if (!period_type_set)
 			synth_opts->period_type =
 					PERF_ITRACE_DEFAULT_PERIOD_TYPE;
 		if (!synth_opts->period)

From 8bd1b2d2578ca2688969352ed1f8a0a8f10dbb63 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 17 Jul 2015 19:33:47 +0300
Subject: [PATCH 23/26] perf tools: Fix perf-with-kcore handling of arguments
 containing spaces

Fix the perf-with-kcore script so that it doesn't split arguments that
contain spaces.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1437150840-31811-13-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/perf-with-kcore.sh | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh
index c7ff90a90e4e..7e47a7cbc195 100644
--- a/tools/perf/perf-with-kcore.sh
+++ b/tools/perf/perf-with-kcore.sh
@@ -50,7 +50,7 @@ copy_kcore()
 	fi
 
 	rm -f perf.data.junk
-	("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null &
+	("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
 	PERF_PID=$!
 
 	# Need to make sure that perf has started
@@ -160,18 +160,18 @@ record()
 			echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
 		fi
 
-		if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
+		if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
 			echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
 		fi
 
-		if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
+		if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
 			if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
 				echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
 			fi
 
-			if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
+			if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
 				true
-			elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
+			elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
 				true
 			elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
 				echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
@@ -193,8 +193,8 @@ record()
 
 	mkdir "$PERF_DATA_DIR"
 
-	echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*"
-	"$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true
+	echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
+	"$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
 
 	if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
 		exit 1
@@ -209,8 +209,8 @@ subcommand()
 {
 	find_perf
 	check_buildid_cache_permissions
-	echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*"
-	"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $*
+	echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
+	"$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
 }
 
 if [ "$1" = "fix_buildid_cache_permissions" ] ; then
@@ -234,7 +234,7 @@ fi
 case "$PERF_SUB_COMMAND" in
 "record")
 	while [ "$1" != "--" ] ; do
-		PERF_OPTIONS+="$1 "
+		PERF_OPTIONS+=("$1")
 		shift || break
 	done
 	if [ "$1" != "--" ] ; then
@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in
 		usage
 	fi
 	shift
-	record $*
+	record "$@"
 ;;
 "script")
-	subcommand $*
+	subcommand "$@"
 ;;
 "report")
-	subcommand $*
+	subcommand "$@"
 ;;
 "inject")
-	subcommand $*
+	subcommand "$@"
 ;;
 *)
 	usage

From 09ff607176ab2bf7e038150100fdf9290a6fbe47 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 17 Jul 2015 19:33:49 +0300
Subject: [PATCH 24/26] perf tools: Add perf_pmu__format_bits()

Add perf_pmu__format_bits() to get the format bits for a PMU config
term.  Intel PT will use this to validate terms and to record format
bits to enable later interpreting the config from the attribute stored
in the perf.data file.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1437150840-31811-15-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 17 ++++++++++++++++-
 tools/perf/util/pmu.h |  1 +
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b615cdf211d6..c548ec89c8bc 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -542,7 +542,7 @@ struct perf_pmu *perf_pmu__find(const char *name)
 }
 
 static struct perf_pmu_format *
-pmu_find_format(struct list_head *formats, char *name)
+pmu_find_format(struct list_head *formats, const char *name)
 {
 	struct perf_pmu_format *format;
 
@@ -553,6 +553,21 @@ pmu_find_format(struct list_head *formats, char *name)
 	return NULL;
 }
 
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
+{
+	struct perf_pmu_format *format = pmu_find_format(formats, name);
+	__u64 bits = 0;
+	int fbit;
+
+	if (!format)
+		return 0;
+
+	for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
+		bits |= 1ULL << fbit;
+
+	return bits;
+}
+
 /*
  * Sets value based on the format definition (format parameter)
  * and unformated value (value parameter).
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 7b9c8cf8ae3e..5d7e84466bee 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct list_head *head_terms,
 			   bool zero, struct parse_events_error *error);
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
 			  struct perf_pmu_info *info);
 struct list_head *perf_pmu__alias(struct perf_pmu *pmu,

From 0efe6b67690b6546daa0d2f34a17eb3ca46c9dea Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 17 Jul 2015 19:33:50 +0300
Subject: [PATCH 25/26] perf tools: Validate config term maximum value

Currently the value of a PMU config term is silently truncated if it is
too big. This is an impediment to validating the value for other
criteria later on i.e.  the user provides an invalid value that gets
truncated to a valid one.

The maximum value validation is only done for the parser where the error
is passed back to the user. In other cases the silent truncation
continues so as not to affect tools that perhaps rely on it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1437150840-31811-16-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index c548ec89c8bc..d4b0e6454bc6 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -589,6 +589,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
 	}
 }
 
+static __u64 pmu_format_max_value(const unsigned long *format)
+{
+	int w;
+
+	w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+	if (!w)
+		return 0;
+	if (w < 64)
+		return (1ULL << w) - 1;
+	return -1;
+}
+
 /*
  * Term is a string term, and might be a param-term. Try to look up it's value
  * in the remaining terms.
@@ -662,7 +674,7 @@ static int pmu_config_term(struct list_head *formats,
 {
 	struct perf_pmu_format *format;
 	__u64 *vp;
-	__u64 val;
+	__u64 val, max_val;
 
 	/*
 	 * If this is a parameter we've already used for parameterized-eval,
@@ -728,6 +740,22 @@ static int pmu_config_term(struct list_head *formats,
 	} else
 		return -EINVAL;
 
+	max_val = pmu_format_max_value(format->bits);
+	if (val > max_val) {
+		if (err) {
+			err->idx = term->err_val;
+			if (asprintf(&err->str,
+				     "value too big for format, maximum is %llu",
+				     (unsigned long long)max_val) < 0)
+				err->str = strdup("value too big for format");
+			return -EINVAL;
+		}
+		/*
+		 * Assume we don't care if !err, in which case the value will be
+		 * silently truncated.
+		 */
+	}
+
 	pmu_format_value(format->bits, val, vp, zero);
 	return 0;
 }

From 141b2d3161f19a774b3ceaa8faed5e63484a4684 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 17 Jul 2015 19:33:51 +0300
Subject: [PATCH 26/26] perf tools: Extend the event parser maximum error index

Extend the event parser maximum error index from 10 to 13.  That allows
PMU config terms of up to 10 characters to display un-truncated in the
error message.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1437150840-31811-17-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a6cb9afc20e2..828936dc3f1e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1168,7 +1168,7 @@ static void parse_events_print_error(struct parse_events_error *err,
 		 * Maximum error index indent, we will cut
 		 * the event string if it's bigger.
 		 */
-		int max_err_idx = 10;
+		int max_err_idx = 13;
 
 		/*
 		 * Let's be specific with the message when