2014-06-01 21:38:29 +08:00
|
|
|
#include <sys/mman.h>
|
2009-09-25 00:02:49 +08:00
|
|
|
#include "sort.h"
|
2010-07-21 01:42:52 +08:00
|
|
|
#include "hist.h"
|
2013-09-13 15:28:57 +08:00
|
|
|
#include "comm.h"
|
2013-04-03 20:26:19 +08:00
|
|
|
#include "symbol.h"
|
2014-03-03 10:46:55 +08:00
|
|
|
#include "evsel.h"
|
2009-09-25 00:02:49 +08:00
|
|
|
|
|
|
|
regex_t parent_regex;
|
2010-05-18 03:22:41 +08:00
|
|
|
const char default_parent_pattern[] = "^sys_|^do_page_fault";
|
|
|
|
const char *parent_pattern = default_parent_pattern;
|
|
|
|
const char default_sort_order[] = "comm,dso,symbol";
|
2014-03-18 10:31:39 +08:00
|
|
|
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to";
|
|
|
|
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
|
|
|
|
const char default_top_sort_order[] = "dso,symbol";
|
|
|
|
const char default_diff_sort_order[] = "dso,symbol";
|
|
|
|
const char *sort_order;
|
2014-03-04 09:46:34 +08:00
|
|
|
const char *field_order;
|
2012-12-07 13:48:05 +08:00
|
|
|
regex_t ignore_callees_regex;
|
|
|
|
int have_ignore_callees = 0;
|
2009-10-23 05:23:22 +08:00
|
|
|
int sort__need_collapse = 0;
|
|
|
|
int sort__has_parent = 0;
|
2012-09-14 16:35:27 +08:00
|
|
|
int sort__has_sym = 0;
|
2013-12-18 13:21:10 +08:00
|
|
|
int sort__has_dso = 0;
|
2013-04-01 19:35:20 +08:00
|
|
|
enum sort_mode sort__mode = SORT_MODE__NORMAL;
|
perf tools: Bind callchains to the first sort dimension column
Currently, the callchains are displayed using a constant left
margin. So depending on the current sort dimension
configuration, callchains may appear to be well attached to the
first sort dimension column field which is mostly the case,
except when the first dimension of sorting is done by comm,
because these are right aligned.
This patch binds the callchain to the first letter in the first
column, whatever type of column it is (dso, comm, symbol).
Before:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
After:
0.80% perf [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
| | __fsnotify_parent
Also, for clarity, we don't put anymore the callchain as is but:
- If we have a top level ancestor in the callchain, start it
with a first ascii hook.
Before:
0.80% perf [kernel] [k] __lock_acquire
__lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
After:
0.80% perf [kernel] [k] __lock_acquire
|
--- __lock_acquire
lock_acquire
|
|--58.33%-- _spin_lock
| |
| |--28.57%-- inotify_should_send_event
| | fsnotify
[..] [..]
- Otherwise, if we have several top level ancestors, then
display these like we did before:
1.69% Xorg
|
|--21.21%-- vread_hpet
| 0x7fffd85b46fc
| 0x7fffd85b494d
| 0x7f4fafb4e54d
|
|--15.15%-- exaOffscreenAlloc
|
|--9.09%-- I830WaitLpRing
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-23 05:23:23 +08:00
|
|
|
|
2009-09-25 00:02:49 +08:00
|
|
|
|
2010-03-31 22:33:40 +08:00
|
|
|
static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
|
2009-09-25 00:02:49 +08:00
|
|
|
{
|
|
|
|
int n;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
2010-03-31 22:33:40 +08:00
|
|
|
n = vsnprintf(bf, size, fmt, ap);
|
2012-09-06 23:46:56 +08:00
|
|
|
if (symbol_conf.field_sep && n > 0) {
|
2010-03-31 22:33:40 +08:00
|
|
|
char *sep = bf;
|
|
|
|
|
|
|
|
while (1) {
|
2012-09-06 23:46:56 +08:00
|
|
|
sep = strchr(sep, *symbol_conf.field_sep);
|
2010-03-31 22:33:40 +08:00
|
|
|
if (sep == NULL)
|
|
|
|
break;
|
|
|
|
*sep = '.';
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
va_end(ap);
|
perf tools: Incorrect use of snprintf results in SEGV
I have a workload where perf top scribbles over the stack and we SEGV.
What makes it interesting is that an snprintf is causing this.
The workload is a c++ gem that has method names over 3000 characters
long, but snprintf is designed to avoid overrunning buffers. So what
went wrong?
The problem is we assume snprintf returns the number of characters
written:
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level);
...
ret += repsep_snprintf(bf + ret, size - ret, "%s", self->ms.sym->name);
Unfortunately this is not how snprintf works. snprintf returns the
number of characters that would have been written if there was enough
space. In the above case, if the first snprintf returns a value larger
than size, we pass a negative size into the second snprintf and happily
scribble over the stack. If you have 3000 character c++ methods thats a
lot of stack to trample.
This patch fixes repsep_snprintf by clamping the value at size - 1 which
is the maximum snprintf can write before adding the NULL terminator.
I get the sinking feeling that there are a lot of other uses of snprintf
that have this same bug, we should audit them all.
Cc: David Ahern <dsahern@gmail.com>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Cc: stable@kernel.org
Link: http://lkml.kernel.org/r/20120307114249.44275ca3@kryten
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-03-07 08:42:49 +08:00
|
|
|
|
|
|
|
if (n >= (int)size)
|
|
|
|
return size - 1;
|
2009-09-25 00:02:49 +08:00
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2013-09-11 20:46:56 +08:00
|
|
|
static int64_t cmp_null(const void *l, const void *r)
|
2011-06-29 09:14:52 +08:00
|
|
|
{
|
|
|
|
if (!l && !r)
|
|
|
|
return 0;
|
|
|
|
else if (!l)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* --sort pid */
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-07-04 21:20:31 +08:00
|
|
|
return right->thread->tid - left->thread->tid;
|
2011-06-29 09:14:52 +08:00
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
|
2010-03-31 22:33:40 +08:00
|
|
|
size_t size, unsigned int width)
|
2009-09-25 00:02:49 +08:00
|
|
|
{
|
2013-09-11 20:46:56 +08:00
|
|
|
const char *comm = thread__comm_str(he->thread);
|
2014-07-31 13:47:38 +08:00
|
|
|
|
|
|
|
width = max(7U, width) - 6;
|
|
|
|
return repsep_snprintf(bf, size, "%5d:%-*.*s", he->thread->tid,
|
|
|
|
width, width, comm ?: "");
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
struct sort_entry sort_thread = {
|
2014-07-31 13:47:35 +08:00
|
|
|
.se_header = " Pid:Command",
|
2011-06-29 09:14:52 +08:00
|
|
|
.se_cmp = sort__thread_cmp,
|
|
|
|
.se_snprintf = hist_entry__thread_snprintf,
|
|
|
|
.se_width_idx = HISTC_THREAD,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* --sort comm */
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-09-11 23:18:09 +08:00
|
|
|
/* Compare the addr that should be unique among comm */
|
2015-05-15 23:54:28 +08:00
|
|
|
return strcmp(comm__str(right->comm), comm__str(left->comm));
|
2011-06-29 09:14:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-09-13 15:28:57 +08:00
|
|
|
/* Compare the addr that should be unique among comm */
|
2015-05-15 23:54:28 +08:00
|
|
|
return strcmp(comm__str(right->comm), comm__str(left->comm));
|
2011-06-29 09:14:52 +08:00
|
|
|
}
|
|
|
|
|
2014-03-04 10:01:41 +08:00
|
|
|
static int64_t
|
|
|
|
sort__comm_sort(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return strcmp(comm__str(right->comm), comm__str(left->comm));
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
|
2010-03-31 22:33:40 +08:00
|
|
|
size_t size, unsigned int width)
|
2009-09-25 00:02:49 +08:00
|
|
|
{
|
2014-07-31 13:47:38 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, comm__str(he->comm));
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
|
2012-12-27 17:11:38 +08:00
|
|
|
struct sort_entry sort_comm = {
|
|
|
|
.se_header = "Command",
|
|
|
|
.se_cmp = sort__comm_cmp,
|
|
|
|
.se_collapse = sort__comm_collapse,
|
2014-03-04 10:01:41 +08:00
|
|
|
.se_sort = sort__comm_sort,
|
2012-12-27 17:11:38 +08:00
|
|
|
.se_snprintf = hist_entry__comm_snprintf,
|
|
|
|
.se_width_idx = HISTC_COMM,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* --sort dso */
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
|
|
|
|
{
|
|
|
|
struct dso *dso_l = map_l ? map_l->dso : NULL;
|
|
|
|
struct dso *dso_r = map_r ? map_r->dso : NULL;
|
|
|
|
const char *dso_name_l, *dso_name_r;
|
|
|
|
|
|
|
|
if (!dso_l || !dso_r)
|
2014-03-04 10:01:41 +08:00
|
|
|
return cmp_null(dso_r, dso_l);
|
2012-02-10 06:21:01 +08:00
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
dso_name_l = dso_l->long_name;
|
|
|
|
dso_name_r = dso_r->long_name;
|
|
|
|
} else {
|
|
|
|
dso_name_l = dso_l->short_name;
|
|
|
|
dso_name_r = dso_r->short_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
return strcmp(dso_name_l, dso_name_r);
|
|
|
|
}
|
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
static int64_t
|
2009-09-25 00:02:49 +08:00
|
|
|
sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-03-04 10:01:41 +08:00
|
|
|
return _sort__dso_cmp(right->ms.map, left->ms.map);
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
2009-09-25 00:02:49 +08:00
|
|
|
|
2012-12-27 17:11:38 +08:00
|
|
|
static int _hist_entry__dso_snprintf(struct map *map, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
if (map && map->dso) {
|
|
|
|
const char *dso_name = !verbose ? map->dso->short_name :
|
|
|
|
map->dso->long_name;
|
2014-07-31 13:47:38 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
|
2012-12-27 17:11:38 +08:00
|
|
|
}
|
|
|
|
|
2014-07-31 13:47:38 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "[unknown]");
|
2012-12-27 17:11:38 +08:00
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 17:11:38 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-23 06:01:31 +08:00
|
|
|
return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
|
2012-12-27 17:11:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_dso = {
|
|
|
|
.se_header = "Shared Object",
|
|
|
|
.se_cmp = sort__dso_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_snprintf,
|
|
|
|
.se_width_idx = HISTC_DSO,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* --sort symbol */
|
2009-09-25 00:02:49 +08:00
|
|
|
|
2013-12-18 13:21:09 +08:00
|
|
|
static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
|
|
|
|
{
|
|
|
|
return (int64_t)(right_ip - left_ip);
|
|
|
|
}
|
|
|
|
|
2013-02-06 13:57:15 +08:00
|
|
|
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
|
2012-02-10 06:21:01 +08:00
|
|
|
{
|
2013-02-06 13:57:15 +08:00
|
|
|
u64 ip_l, ip_r;
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
if (!sym_l || !sym_r)
|
|
|
|
return cmp_null(sym_l, sym_r);
|
|
|
|
|
|
|
|
if (sym_l == sym_r)
|
|
|
|
return 0;
|
|
|
|
|
2012-12-21 03:11:20 +08:00
|
|
|
ip_l = sym_l->start;
|
|
|
|
ip_r = sym_r->start;
|
2012-02-10 06:21:01 +08:00
|
|
|
|
|
|
|
return (int64_t)(ip_r - ip_l);
|
|
|
|
}
|
|
|
|
|
2012-12-27 17:11:38 +08:00
|
|
|
static int64_t
|
|
|
|
sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
|
2012-02-10 06:21:01 +08:00
|
|
|
{
|
2013-10-15 10:01:56 +08:00
|
|
|
int64_t ret;
|
|
|
|
|
2012-12-27 17:11:38 +08:00
|
|
|
if (!left->ms.sym && !right->ms.sym)
|
2013-12-18 13:21:09 +08:00
|
|
|
return _sort__addr_cmp(left->ip, right->ip);
|
2009-09-25 00:02:49 +08:00
|
|
|
|
2013-10-15 10:01:56 +08:00
|
|
|
/*
|
|
|
|
* comparing symbol address alone is not enough since it's a
|
|
|
|
* relative address within a dso.
|
|
|
|
*/
|
2013-12-18 13:21:10 +08:00
|
|
|
if (!sort__has_dso) {
|
|
|
|
ret = sort__dso_cmp(left, right);
|
|
|
|
if (ret != 0)
|
|
|
|
return ret;
|
|
|
|
}
|
2013-10-15 10:01:56 +08:00
|
|
|
|
2013-02-06 13:57:15 +08:00
|
|
|
return _sort__sym_cmp(left->ms.sym, right->ms.sym);
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
2014-03-04 10:01:41 +08:00
|
|
|
static int64_t
|
|
|
|
sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
if (!left->ms.sym || !right->ms.sym)
|
|
|
|
return cmp_null(left->ms.sym, right->ms.sym);
|
|
|
|
|
|
|
|
return strcmp(right->ms.sym->name, left->ms.sym->name);
|
|
|
|
}
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
|
|
|
|
u64 ip, char level, char *bf, size_t size,
|
2012-12-27 17:11:39 +08:00
|
|
|
unsigned int width)
|
2012-02-10 06:21:01 +08:00
|
|
|
{
|
|
|
|
size_t ret = 0;
|
|
|
|
|
|
|
|
if (verbose) {
|
|
|
|
char o = map ? dso__symtab_origin(map->dso) : '!';
|
|
|
|
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
|
2013-04-01 19:35:19 +08:00
|
|
|
BITS_PER_LONG / 4 + 2, ip, o);
|
2009-10-02 14:29:58 +08:00
|
|
|
}
|
2009-09-25 00:02:49 +08:00
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
|
2013-01-24 23:10:35 +08:00
|
|
|
if (sym && map) {
|
|
|
|
if (map->type == MAP__VARIABLE) {
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
|
2013-01-24 23:10:42 +08:00
|
|
|
ip - map->unmap_ip(map, sym->start));
|
2013-01-24 23:10:35 +08:00
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
|
|
|
width - ret, "");
|
|
|
|
} else {
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
|
|
|
width - ret,
|
|
|
|
sym->name);
|
|
|
|
}
|
|
|
|
} else {
|
2012-02-10 06:21:01 +08:00
|
|
|
size_t len = BITS_PER_LONG / 4;
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
|
|
|
|
len, ip);
|
|
|
|
ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
|
|
|
|
width - ret, "");
|
|
|
|
}
|
|
|
|
|
2014-07-31 13:47:38 +08:00
|
|
|
if (ret > width)
|
|
|
|
bf[width] = '\0';
|
|
|
|
|
|
|
|
return width;
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 17:11:39 +08:00
|
|
|
size_t size, unsigned int width)
|
2012-02-10 06:21:01 +08:00
|
|
|
{
|
2013-10-23 06:01:31 +08:00
|
|
|
return _hist_entry__sym_snprintf(he->ms.map, he->ms.sym, he->ip,
|
|
|
|
he->level, bf, size, width);
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
2009-09-25 00:02:49 +08:00
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
struct sort_entry sort_sym = {
|
|
|
|
.se_header = "Symbol",
|
|
|
|
.se_cmp = sort__sym_cmp,
|
2014-03-04 10:01:41 +08:00
|
|
|
.se_sort = sort__sym_sort,
|
2011-06-29 09:14:52 +08:00
|
|
|
.se_snprintf = hist_entry__sym_snprintf,
|
|
|
|
.se_width_idx = HISTC_SYMBOL,
|
|
|
|
};
|
2009-09-25 00:02:49 +08:00
|
|
|
|
2012-05-30 21:33:24 +08:00
|
|
|
/* --sort srcline */
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2013-09-11 13:09:33 +08:00
|
|
|
if (!left->srcline) {
|
|
|
|
if (!left->ms.map)
|
|
|
|
left->srcline = SRCLINE_UNKNOWN;
|
|
|
|
else {
|
|
|
|
struct map *map = left->ms.map;
|
|
|
|
left->srcline = get_srcline(map->dso,
|
2014-11-13 10:05:27 +08:00
|
|
|
map__rip_2objdump(map, left->ip),
|
|
|
|
left->ms.sym, true);
|
2013-09-11 13:09:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!right->srcline) {
|
|
|
|
if (!right->ms.map)
|
|
|
|
right->srcline = SRCLINE_UNKNOWN;
|
|
|
|
else {
|
|
|
|
struct map *map = right->ms.map;
|
|
|
|
right->srcline = get_srcline(map->dso,
|
2014-11-13 10:05:27 +08:00
|
|
|
map__rip_2objdump(map, right->ip),
|
|
|
|
right->ms.sym, true);
|
2013-09-11 13:09:33 +08:00
|
|
|
}
|
|
|
|
}
|
2014-03-04 10:01:41 +08:00
|
|
|
return strcmp(right->srcline, left->srcline);
|
2012-05-30 21:33:24 +08:00
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
|
2014-07-31 13:47:38 +08:00
|
|
|
size_t size, unsigned int width)
|
2012-05-30 21:33:24 +08:00
|
|
|
{
|
2014-11-19 05:02:51 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline);
|
2012-05-30 21:33:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_srcline = {
|
|
|
|
.se_header = "Source:Line",
|
|
|
|
.se_cmp = sort__srcline_cmp,
|
|
|
|
.se_snprintf = hist_entry__srcline_snprintf,
|
|
|
|
.se_width_idx = HISTC_SRCLINE,
|
|
|
|
};
|
|
|
|
|
2009-09-25 00:02:49 +08:00
|
|
|
/* --sort parent */
|
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
static int64_t
|
2009-09-25 00:02:49 +08:00
|
|
|
sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
struct symbol *sym_l = left->parent;
|
|
|
|
struct symbol *sym_r = right->parent;
|
|
|
|
|
|
|
|
if (!sym_l || !sym_r)
|
|
|
|
return cmp_null(sym_l, sym_r);
|
|
|
|
|
2014-03-04 10:01:41 +08:00
|
|
|
return strcmp(sym_r->name, sym_l->name);
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
|
2010-03-31 22:33:40 +08:00
|
|
|
size_t size, unsigned int width)
|
2009-09-25 00:02:49 +08:00
|
|
|
{
|
2014-07-31 13:47:38 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width,
|
2013-10-23 06:01:31 +08:00
|
|
|
he->parent ? he->parent->name : "[other]");
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
struct sort_entry sort_parent = {
|
|
|
|
.se_header = "Parent symbol",
|
|
|
|
.se_cmp = sort__parent_cmp,
|
|
|
|
.se_snprintf = hist_entry__parent_snprintf,
|
|
|
|
.se_width_idx = HISTC_PARENT,
|
|
|
|
};
|
|
|
|
|
2010-06-04 22:27:10 +08:00
|
|
|
/* --sort cpu */
|
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
static int64_t
|
2010-06-04 22:27:10 +08:00
|
|
|
sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return right->cpu - left->cpu;
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
2010-06-04 22:27:10 +08:00
|
|
|
{
|
2014-07-31 13:47:38 +08:00
|
|
|
return repsep_snprintf(bf, size, "%*.*d", width, width, he->cpu);
|
2010-06-04 22:27:10 +08:00
|
|
|
}
|
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
struct sort_entry sort_cpu = {
|
|
|
|
.se_header = "CPU",
|
|
|
|
.se_cmp = sort__cpu_cmp,
|
|
|
|
.se_snprintf = hist_entry__cpu_snprintf,
|
|
|
|
.se_width_idx = HISTC_CPU,
|
|
|
|
};
|
|
|
|
|
2012-12-27 17:11:38 +08:00
|
|
|
/* sort keys for branch stacks */
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
static int64_t
|
|
|
|
sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 22:07:07 +08:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
return _sort__dso_cmp(left->branch_info->from.map,
|
|
|
|
right->branch_info->from.map);
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
|
2012-02-10 06:21:01 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 22:07:07 +08:00
|
|
|
if (he->branch_info)
|
|
|
|
return _hist_entry__dso_snprintf(he->branch_info->from.map,
|
|
|
|
bf, size, width);
|
|
|
|
else
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 22:07:06 +08:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
return _sort__dso_cmp(left->branch_info->to.map,
|
|
|
|
right->branch_info->to.map);
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
|
2012-02-10 06:21:01 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 22:07:06 +08:00
|
|
|
if (he->branch_info)
|
|
|
|
return _hist_entry__dso_snprintf(he->branch_info->to.map,
|
|
|
|
bf, size, width);
|
|
|
|
else
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
struct addr_map_symbol *from_l = &left->branch_info->from;
|
|
|
|
struct addr_map_symbol *from_r = &right->branch_info->from;
|
|
|
|
|
2014-10-16 22:07:05 +08:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
|
|
|
from_l = &left->branch_info->from;
|
|
|
|
from_r = &right->branch_info->from;
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
if (!from_l->sym && !from_r->sym)
|
2013-12-18 13:21:09 +08:00
|
|
|
return _sort__addr_cmp(from_l->addr, from_r->addr);
|
2012-02-10 06:21:01 +08:00
|
|
|
|
2013-02-06 13:57:15 +08:00
|
|
|
return _sort__sym_cmp(from_l->sym, from_r->sym);
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 22:07:04 +08:00
|
|
|
struct addr_map_symbol *to_l, *to_r;
|
|
|
|
|
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
|
|
|
to_l = &left->branch_info->to;
|
|
|
|
to_r = &right->branch_info->to;
|
2012-02-10 06:21:01 +08:00
|
|
|
|
|
|
|
if (!to_l->sym && !to_r->sym)
|
2013-12-18 13:21:09 +08:00
|
|
|
return _sort__addr_cmp(to_l->addr, to_r->addr);
|
2012-02-10 06:21:01 +08:00
|
|
|
|
2013-02-06 13:57:15 +08:00
|
|
|
return _sort__sym_cmp(to_l->sym, to_r->sym);
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 17:11:39 +08:00
|
|
|
size_t size, unsigned int width)
|
2012-02-10 06:21:01 +08:00
|
|
|
{
|
2014-10-16 22:07:05 +08:00
|
|
|
if (he->branch_info) {
|
|
|
|
struct addr_map_symbol *from = &he->branch_info->from;
|
2012-02-10 06:21:01 +08:00
|
|
|
|
2014-10-16 22:07:05 +08:00
|
|
|
return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
|
|
|
|
he->level, bf, size, width);
|
|
|
|
}
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
|
2012-12-27 17:11:39 +08:00
|
|
|
size_t size, unsigned int width)
|
2012-02-10 06:21:01 +08:00
|
|
|
{
|
2014-10-16 22:07:04 +08:00
|
|
|
if (he->branch_info) {
|
|
|
|
struct addr_map_symbol *to = &he->branch_info->to;
|
|
|
|
|
|
|
|
return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
|
|
|
|
he->level, bf, size, width);
|
|
|
|
}
|
2012-02-10 06:21:01 +08:00
|
|
|
|
2014-10-16 22:07:04 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
2012-12-27 17:11:38 +08:00
|
|
|
struct sort_entry sort_dso_from = {
|
|
|
|
.se_header = "Source Shared Object",
|
|
|
|
.se_cmp = sort__dso_from_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_from_snprintf,
|
|
|
|
.se_width_idx = HISTC_DSO_FROM,
|
|
|
|
};
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
struct sort_entry sort_dso_to = {
|
|
|
|
.se_header = "Target Shared Object",
|
|
|
|
.se_cmp = sort__dso_to_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_to_snprintf,
|
|
|
|
.se_width_idx = HISTC_DSO_TO,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_sym_from = {
|
|
|
|
.se_header = "Source Symbol",
|
|
|
|
.se_cmp = sort__sym_from_cmp,
|
|
|
|
.se_snprintf = hist_entry__sym_from_snprintf,
|
|
|
|
.se_width_idx = HISTC_SYMBOL_FROM,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_sym_to = {
|
|
|
|
.se_header = "Target Symbol",
|
|
|
|
.se_cmp = sort__sym_to_cmp,
|
|
|
|
.se_snprintf = hist_entry__sym_to_snprintf,
|
|
|
|
.se_width_idx = HISTC_SYMBOL_TO,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 22:07:03 +08:00
|
|
|
unsigned char mp, p;
|
2012-02-10 06:21:01 +08:00
|
|
|
|
2014-10-16 22:07:03 +08:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
|
|
|
mp = left->branch_info->flags.mispred != right->branch_info->flags.mispred;
|
|
|
|
p = left->branch_info->flags.predicted != right->branch_info->flags.predicted;
|
2012-02-10 06:21:01 +08:00
|
|
|
return mp || p;
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
|
2012-02-10 06:21:01 +08:00
|
|
|
size_t size, unsigned int width){
|
|
|
|
static const char *out = "N/A";
|
|
|
|
|
2014-10-16 22:07:03 +08:00
|
|
|
if (he->branch_info) {
|
|
|
|
if (he->branch_info->flags.predicted)
|
|
|
|
out = "N";
|
|
|
|
else if (he->branch_info->flags.mispred)
|
|
|
|
out = "Y";
|
|
|
|
}
|
2012-02-10 06:21:01 +08:00
|
|
|
|
2014-07-31 13:47:38 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
|
2012-02-10 06:21:01 +08:00
|
|
|
}
|
|
|
|
|
2013-01-24 23:10:35 +08:00
|
|
|
/* --sort daddr_sym */
|
|
|
|
static int64_t
|
|
|
|
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
uint64_t l = 0, r = 0;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
l = left->mem_info->daddr.addr;
|
|
|
|
if (right->mem_info)
|
|
|
|
r = right->mem_info->daddr.addr;
|
|
|
|
|
|
|
|
return (int64_t)(r - l);
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:35 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
uint64_t addr = 0;
|
|
|
|
struct map *map = NULL;
|
|
|
|
struct symbol *sym = NULL;
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
if (he->mem_info) {
|
|
|
|
addr = he->mem_info->daddr.addr;
|
|
|
|
map = he->mem_info->daddr.map;
|
|
|
|
sym = he->mem_info->daddr.sym;
|
2013-01-24 23:10:35 +08:00
|
|
|
}
|
2013-10-23 06:01:31 +08:00
|
|
|
return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
|
2013-01-24 23:10:35 +08:00
|
|
|
width);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
struct map *map_l = NULL;
|
|
|
|
struct map *map_r = NULL;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
map_l = left->mem_info->daddr.map;
|
|
|
|
if (right->mem_info)
|
|
|
|
map_r = right->mem_info->daddr.map;
|
|
|
|
|
|
|
|
return _sort__dso_cmp(map_l, map_r);
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:35 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
struct map *map = NULL;
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
if (he->mem_info)
|
|
|
|
map = he->mem_info->daddr.map;
|
2013-01-24 23:10:35 +08:00
|
|
|
|
|
|
|
return _hist_entry__dso_snprintf(map, bf, size, width);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_lock = PERF_MEM_LOCK_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_lock = PERF_MEM_LOCK_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:35 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
const char *out;
|
|
|
|
u64 mask = PERF_MEM_LOCK_NA;
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
if (he->mem_info)
|
|
|
|
mask = he->mem_info->data_src.mem_lock;
|
2013-01-24 23:10:35 +08:00
|
|
|
|
|
|
|
if (mask & PERF_MEM_LOCK_NA)
|
|
|
|
out = "N/A";
|
|
|
|
else if (mask & PERF_MEM_LOCK_LOCKED)
|
|
|
|
out = "Yes";
|
|
|
|
else
|
|
|
|
out = "No";
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const tlb_access[] = {
|
|
|
|
"N/A",
|
|
|
|
"HIT",
|
|
|
|
"MISS",
|
|
|
|
"L1",
|
|
|
|
"L2",
|
|
|
|
"Walker",
|
|
|
|
"Fault",
|
|
|
|
};
|
|
|
|
#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:35 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
char out[64];
|
|
|
|
size_t sz = sizeof(out) - 1; /* -1 for null termination */
|
|
|
|
size_t l = 0, i;
|
|
|
|
u64 m = PERF_MEM_TLB_NA;
|
|
|
|
u64 hit, miss;
|
|
|
|
|
|
|
|
out[0] = '\0';
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
if (he->mem_info)
|
|
|
|
m = he->mem_info->data_src.mem_dtlb;
|
2013-01-24 23:10:35 +08:00
|
|
|
|
|
|
|
hit = m & PERF_MEM_TLB_HIT;
|
|
|
|
miss = m & PERF_MEM_TLB_MISS;
|
|
|
|
|
|
|
|
/* already taken care of */
|
|
|
|
m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
|
|
|
|
|
|
|
|
for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) {
|
|
|
|
if (!(m & 0x1))
|
|
|
|
continue;
|
|
|
|
if (l) {
|
|
|
|
strcat(out, " or ");
|
|
|
|
l += 4;
|
|
|
|
}
|
|
|
|
strncat(out, tlb_access[i], sz - l);
|
|
|
|
l += strlen(tlb_access[i]);
|
|
|
|
}
|
|
|
|
if (*out == '\0')
|
|
|
|
strcpy(out, "N/A");
|
|
|
|
if (hit)
|
|
|
|
strncat(out, " hit", sz - l);
|
|
|
|
if (miss)
|
|
|
|
strncat(out, " miss", sz - l);
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_lvl = PERF_MEM_LVL_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_lvl = PERF_MEM_LVL_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const mem_lvl[] = {
|
|
|
|
"N/A",
|
|
|
|
"HIT",
|
|
|
|
"MISS",
|
|
|
|
"L1",
|
|
|
|
"LFB",
|
|
|
|
"L2",
|
|
|
|
"L3",
|
|
|
|
"Local RAM",
|
|
|
|
"Remote RAM (1 hop)",
|
|
|
|
"Remote RAM (2 hops)",
|
|
|
|
"Remote Cache (1 hop)",
|
|
|
|
"Remote Cache (2 hops)",
|
|
|
|
"I/O",
|
|
|
|
"Uncached",
|
|
|
|
};
|
|
|
|
#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:35 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
char out[64];
|
|
|
|
size_t sz = sizeof(out) - 1; /* -1 for null termination */
|
|
|
|
size_t i, l = 0;
|
|
|
|
u64 m = PERF_MEM_LVL_NA;
|
|
|
|
u64 hit, miss;
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
if (he->mem_info)
|
|
|
|
m = he->mem_info->data_src.mem_lvl;
|
2013-01-24 23:10:35 +08:00
|
|
|
|
|
|
|
out[0] = '\0';
|
|
|
|
|
|
|
|
hit = m & PERF_MEM_LVL_HIT;
|
|
|
|
miss = m & PERF_MEM_LVL_MISS;
|
|
|
|
|
|
|
|
/* already taken care of */
|
|
|
|
m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
|
|
|
|
|
|
|
|
for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) {
|
|
|
|
if (!(m & 0x1))
|
|
|
|
continue;
|
|
|
|
if (l) {
|
|
|
|
strcat(out, " or ");
|
|
|
|
l += 4;
|
|
|
|
}
|
|
|
|
strncat(out, mem_lvl[i], sz - l);
|
|
|
|
l += strlen(mem_lvl[i]);
|
|
|
|
}
|
|
|
|
if (*out == '\0')
|
|
|
|
strcpy(out, "N/A");
|
|
|
|
if (hit)
|
|
|
|
strncat(out, " hit", sz - l);
|
|
|
|
if (miss)
|
|
|
|
strncat(out, " miss", sz - l);
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
union perf_mem_data_src data_src_l;
|
|
|
|
union perf_mem_data_src data_src_r;
|
|
|
|
|
|
|
|
if (left->mem_info)
|
|
|
|
data_src_l = left->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
|
|
|
|
|
|
|
|
if (right->mem_info)
|
|
|
|
data_src_r = right->mem_info->data_src;
|
|
|
|
else
|
|
|
|
data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
|
|
|
|
|
|
|
|
return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char * const snoop_access[] = {
|
|
|
|
"N/A",
|
|
|
|
"None",
|
|
|
|
"Miss",
|
|
|
|
"Hit",
|
|
|
|
"HitM",
|
|
|
|
};
|
|
|
|
#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:35 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
char out[64];
|
|
|
|
size_t sz = sizeof(out) - 1; /* -1 for null termination */
|
|
|
|
size_t i, l = 0;
|
|
|
|
u64 m = PERF_MEM_SNOOP_NA;
|
|
|
|
|
|
|
|
out[0] = '\0';
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
if (he->mem_info)
|
|
|
|
m = he->mem_info->data_src.mem_snoop;
|
2013-01-24 23:10:35 +08:00
|
|
|
|
|
|
|
for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
|
|
|
|
if (!(m & 0x1))
|
|
|
|
continue;
|
|
|
|
if (l) {
|
|
|
|
strcat(out, " or ");
|
|
|
|
l += 4;
|
|
|
|
}
|
|
|
|
strncat(out, snoop_access[i], sz - l);
|
|
|
|
l += strlen(snoop_access[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*out == '\0')
|
|
|
|
strcpy(out, "N/A");
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
2014-06-01 21:38:29 +08:00
|
|
|
static inline u64 cl_address(u64 address)
|
|
|
|
{
|
|
|
|
/* return the cacheline of the address */
|
|
|
|
return (address & ~(cacheline_size - 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
u64 l, r;
|
|
|
|
struct map *l_map, *r_map;
|
|
|
|
|
|
|
|
if (!left->mem_info) return -1;
|
|
|
|
if (!right->mem_info) return 1;
|
|
|
|
|
|
|
|
/* group event types together */
|
|
|
|
if (left->cpumode > right->cpumode) return -1;
|
|
|
|
if (left->cpumode < right->cpumode) return 1;
|
|
|
|
|
|
|
|
l_map = left->mem_info->daddr.map;
|
|
|
|
r_map = right->mem_info->daddr.map;
|
|
|
|
|
|
|
|
/* if both are NULL, jump to sort on al_addr instead */
|
|
|
|
if (!l_map && !r_map)
|
|
|
|
goto addr;
|
|
|
|
|
|
|
|
if (!l_map) return -1;
|
|
|
|
if (!r_map) return 1;
|
|
|
|
|
|
|
|
if (l_map->maj > r_map->maj) return -1;
|
|
|
|
if (l_map->maj < r_map->maj) return 1;
|
|
|
|
|
|
|
|
if (l_map->min > r_map->min) return -1;
|
|
|
|
if (l_map->min < r_map->min) return 1;
|
|
|
|
|
|
|
|
if (l_map->ino > r_map->ino) return -1;
|
|
|
|
if (l_map->ino < r_map->ino) return 1;
|
|
|
|
|
|
|
|
if (l_map->ino_generation > r_map->ino_generation) return -1;
|
|
|
|
if (l_map->ino_generation < r_map->ino_generation) return 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Addresses with no major/minor numbers are assumed to be
|
|
|
|
* anonymous in userspace. Sort those on pid then address.
|
|
|
|
*
|
|
|
|
* The kernel and non-zero major/minor mapped areas are
|
|
|
|
* assumed to be unity mapped. Sort those on address.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
|
|
|
|
(!(l_map->flags & MAP_SHARED)) &&
|
|
|
|
!l_map->maj && !l_map->min && !l_map->ino &&
|
|
|
|
!l_map->ino_generation) {
|
|
|
|
/* userspace anonymous */
|
|
|
|
|
|
|
|
if (left->thread->pid_ > right->thread->pid_) return -1;
|
|
|
|
if (left->thread->pid_ < right->thread->pid_) return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
addr:
|
|
|
|
/* al_addr does all the right addr - start + offset calculations */
|
|
|
|
l = cl_address(left->mem_info->daddr.al_addr);
|
|
|
|
r = cl_address(right->mem_info->daddr.al_addr);
|
|
|
|
|
|
|
|
if (l > r) return -1;
|
|
|
|
if (l < r) return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
|
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
|
|
|
|
|
|
|
uint64_t addr = 0;
|
|
|
|
struct map *map = NULL;
|
|
|
|
struct symbol *sym = NULL;
|
|
|
|
char level = he->level;
|
|
|
|
|
|
|
|
if (he->mem_info) {
|
|
|
|
addr = cl_address(he->mem_info->daddr.al_addr);
|
|
|
|
map = he->mem_info->daddr.map;
|
|
|
|
sym = he->mem_info->daddr.sym;
|
|
|
|
|
|
|
|
/* print [s] for shared data mmaps */
|
|
|
|
if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
|
|
|
|
map && (map->type == MAP__VARIABLE) &&
|
|
|
|
(map->flags & MAP_SHARED) &&
|
|
|
|
(map->maj || map->min || map->ino ||
|
|
|
|
map->ino_generation))
|
|
|
|
level = 's';
|
|
|
|
else if (!map)
|
|
|
|
level = 'X';
|
|
|
|
}
|
|
|
|
return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
|
|
|
|
width);
|
|
|
|
}
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
struct sort_entry sort_mispredict = {
|
|
|
|
.se_header = "Branch Mispredicted",
|
|
|
|
.se_cmp = sort__mispredict_cmp,
|
|
|
|
.se_snprintf = hist_entry__mispredict_snprintf,
|
|
|
|
.se_width_idx = HISTC_MISPREDICT,
|
|
|
|
};
|
|
|
|
|
2013-01-24 23:10:29 +08:00
|
|
|
static u64 he_weight(struct hist_entry *he)
|
|
|
|
{
|
|
|
|
return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return he_weight(left) - he_weight(right);
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:29 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-23 06:01:31 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
|
2013-01-24 23:10:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_local_weight = {
|
|
|
|
.se_header = "Local Weight",
|
|
|
|
.se_cmp = sort__local_weight_cmp,
|
|
|
|
.se_snprintf = hist_entry__local_weight_snprintf,
|
|
|
|
.se_width_idx = HISTC_LOCAL_WEIGHT,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return left->stat.weight - right->stat.weight;
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
|
2013-01-24 23:10:29 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-23 06:01:31 +08:00
|
|
|
return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
|
2013-01-24 23:10:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_global_weight = {
|
|
|
|
.se_header = "Weight",
|
|
|
|
.se_cmp = sort__global_weight_cmp,
|
|
|
|
.se_snprintf = hist_entry__global_weight_snprintf,
|
|
|
|
.se_width_idx = HISTC_GLOBAL_WEIGHT,
|
|
|
|
};
|
|
|
|
|
2013-01-24 23:10:35 +08:00
|
|
|
struct sort_entry sort_mem_daddr_sym = {
|
|
|
|
.se_header = "Data Symbol",
|
|
|
|
.se_cmp = sort__daddr_cmp,
|
|
|
|
.se_snprintf = hist_entry__daddr_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_DADDR_SYMBOL,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_daddr_dso = {
|
|
|
|
.se_header = "Data Object",
|
|
|
|
.se_cmp = sort__dso_daddr_cmp,
|
|
|
|
.se_snprintf = hist_entry__dso_daddr_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_DADDR_SYMBOL,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_locked = {
|
|
|
|
.se_header = "Locked",
|
|
|
|
.se_cmp = sort__locked_cmp,
|
|
|
|
.se_snprintf = hist_entry__locked_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_LOCKED,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_tlb = {
|
|
|
|
.se_header = "TLB access",
|
|
|
|
.se_cmp = sort__tlb_cmp,
|
|
|
|
.se_snprintf = hist_entry__tlb_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_TLB,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_lvl = {
|
|
|
|
.se_header = "Memory access",
|
|
|
|
.se_cmp = sort__lvl_cmp,
|
|
|
|
.se_snprintf = hist_entry__lvl_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_LVL,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sort_entry sort_mem_snoop = {
|
|
|
|
.se_header = "Snoop",
|
|
|
|
.se_cmp = sort__snoop_cmp,
|
|
|
|
.se_snprintf = hist_entry__snoop_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_SNOOP,
|
|
|
|
};
|
|
|
|
|
2014-06-01 21:38:29 +08:00
|
|
|
struct sort_entry sort_mem_dcacheline = {
|
|
|
|
.se_header = "Data Cacheline",
|
|
|
|
.se_cmp = sort__dcacheline_cmp,
|
|
|
|
.se_snprintf = hist_entry__dcacheline_snprintf,
|
|
|
|
.se_width_idx = HISTC_MEM_DCACHELINE,
|
|
|
|
};
|
|
|
|
|
2013-09-20 22:40:41 +08:00
|
|
|
static int64_t
|
|
|
|
sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 22:07:01 +08:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2013-09-20 22:40:41 +08:00
|
|
|
return left->branch_info->flags.abort !=
|
|
|
|
right->branch_info->flags.abort;
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
|
2013-09-20 22:40:41 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 22:07:01 +08:00
|
|
|
static const char *out = "N/A";
|
|
|
|
|
|
|
|
if (he->branch_info) {
|
|
|
|
if (he->branch_info->flags.abort)
|
|
|
|
out = "A";
|
|
|
|
else
|
|
|
|
out = ".";
|
|
|
|
}
|
2013-09-20 22:40:41 +08:00
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_abort = {
|
|
|
|
.se_header = "Transaction abort",
|
|
|
|
.se_cmp = sort__abort_cmp,
|
|
|
|
.se_snprintf = hist_entry__abort_snprintf,
|
|
|
|
.se_width_idx = HISTC_ABORT,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int64_t
|
|
|
|
sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
2014-10-16 22:07:02 +08:00
|
|
|
if (!left->branch_info || !right->branch_info)
|
|
|
|
return cmp_null(left->branch_info, right->branch_info);
|
|
|
|
|
2013-09-20 22:40:41 +08:00
|
|
|
return left->branch_info->flags.in_tx !=
|
|
|
|
right->branch_info->flags.in_tx;
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
|
2013-09-20 22:40:41 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2014-10-16 22:07:02 +08:00
|
|
|
static const char *out = "N/A";
|
2013-09-20 22:40:41 +08:00
|
|
|
|
2014-10-16 22:07:02 +08:00
|
|
|
if (he->branch_info) {
|
|
|
|
if (he->branch_info->flags.in_tx)
|
|
|
|
out = "T";
|
|
|
|
else
|
|
|
|
out = ".";
|
|
|
|
}
|
2013-09-20 22:40:41 +08:00
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_in_tx = {
|
|
|
|
.se_header = "Branch in transaction",
|
|
|
|
.se_cmp = sort__in_tx_cmp,
|
|
|
|
.se_snprintf = hist_entry__in_tx_snprintf,
|
|
|
|
.se_width_idx = HISTC_IN_TX,
|
|
|
|
};
|
|
|
|
|
2013-09-20 22:40:43 +08:00
|
|
|
static int64_t
|
|
|
|
sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
|
|
|
|
{
|
|
|
|
return left->transaction - right->transaction;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline char *add_str(char *p, const char *str)
|
|
|
|
{
|
|
|
|
strcpy(p, str);
|
|
|
|
return p + strlen(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct txbit {
|
|
|
|
unsigned flag;
|
|
|
|
const char *name;
|
|
|
|
int skip_for_len;
|
|
|
|
} txbits[] = {
|
|
|
|
{ PERF_TXN_ELISION, "EL ", 0 },
|
|
|
|
{ PERF_TXN_TRANSACTION, "TX ", 1 },
|
|
|
|
{ PERF_TXN_SYNC, "SYNC ", 1 },
|
|
|
|
{ PERF_TXN_ASYNC, "ASYNC ", 0 },
|
|
|
|
{ PERF_TXN_RETRY, "RETRY ", 0 },
|
|
|
|
{ PERF_TXN_CONFLICT, "CON ", 0 },
|
|
|
|
{ PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
|
|
|
|
{ PERF_TXN_CAPACITY_READ, "CAP-READ ", 0 },
|
|
|
|
{ 0, NULL, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
int hist_entry__transaction_len(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int len = 0;
|
|
|
|
|
|
|
|
for (i = 0; txbits[i].name; i++) {
|
|
|
|
if (!txbits[i].skip_for_len)
|
|
|
|
len += strlen(txbits[i].name);
|
|
|
|
}
|
|
|
|
len += 4; /* :XX<space> */
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2013-10-23 06:01:31 +08:00
|
|
|
static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
|
2013-09-20 22:40:43 +08:00
|
|
|
size_t size, unsigned int width)
|
|
|
|
{
|
2013-10-23 06:01:31 +08:00
|
|
|
u64 t = he->transaction;
|
2013-09-20 22:40:43 +08:00
|
|
|
char buf[128];
|
|
|
|
char *p = buf;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
buf[0] = 0;
|
|
|
|
for (i = 0; txbits[i].name; i++)
|
|
|
|
if (txbits[i].flag & t)
|
|
|
|
p = add_str(p, txbits[i].name);
|
|
|
|
if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
|
|
|
|
p = add_str(p, "NEITHER ");
|
|
|
|
if (t & PERF_TXN_ABORT_MASK) {
|
|
|
|
sprintf(p, ":%" PRIx64,
|
|
|
|
(t & PERF_TXN_ABORT_MASK) >>
|
|
|
|
PERF_TXN_ABORT_SHIFT);
|
|
|
|
p += strlen(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
return repsep_snprintf(bf, size, "%-*s", width, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_entry sort_transaction = {
|
|
|
|
.se_header = "Transaction ",
|
|
|
|
.se_cmp = sort__transaction_cmp,
|
|
|
|
.se_snprintf = hist_entry__transaction_snprintf,
|
|
|
|
.se_width_idx = HISTC_TRANSACTION,
|
|
|
|
};
|
|
|
|
|
2011-06-29 09:14:52 +08:00
|
|
|
struct sort_dimension {
|
|
|
|
const char *name;
|
|
|
|
struct sort_entry *entry;
|
|
|
|
int taken;
|
|
|
|
};
|
|
|
|
|
2012-02-10 06:21:01 +08:00
|
|
|
#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
|
|
|
|
|
2012-12-27 17:11:46 +08:00
|
|
|
static struct sort_dimension common_sort_dimensions[] = {
|
2012-02-10 06:21:01 +08:00
|
|
|
DIM(SORT_PID, "pid", sort_thread),
|
|
|
|
DIM(SORT_COMM, "comm", sort_comm),
|
|
|
|
DIM(SORT_DSO, "dso", sort_dso),
|
|
|
|
DIM(SORT_SYM, "symbol", sort_sym),
|
|
|
|
DIM(SORT_PARENT, "parent", sort_parent),
|
|
|
|
DIM(SORT_CPU, "cpu", sort_cpu),
|
2012-05-30 21:33:24 +08:00
|
|
|
DIM(SORT_SRCLINE, "srcline", sort_srcline),
|
2013-07-19 06:58:53 +08:00
|
|
|
DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
|
|
|
|
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
|
2013-09-20 22:40:43 +08:00
|
|
|
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
|
2011-06-29 09:14:52 +08:00
|
|
|
};
|
|
|
|
|
2012-12-27 17:11:46 +08:00
|
|
|
#undef DIM
|
|
|
|
|
|
|
|
#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
|
|
|
|
|
|
|
|
static struct sort_dimension bstack_sort_dimensions[] = {
|
|
|
|
DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
|
|
|
|
DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
|
|
|
|
DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
|
|
|
|
DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
|
|
|
|
DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
|
2013-09-20 22:40:41 +08:00
|
|
|
DIM(SORT_IN_TX, "in_tx", sort_in_tx),
|
|
|
|
DIM(SORT_ABORT, "abort", sort_abort),
|
2012-12-27 17:11:46 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#undef DIM
|
|
|
|
|
2013-04-03 20:26:11 +08:00
|
|
|
#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
|
|
|
|
|
|
|
|
static struct sort_dimension memory_sort_dimensions[] = {
|
|
|
|
DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
|
|
|
|
DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
|
|
|
|
DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
|
|
|
|
DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
|
|
|
|
DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
|
|
|
|
DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
|
2014-06-01 21:38:29 +08:00
|
|
|
DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
|
2013-04-03 20:26:11 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#undef DIM
|
|
|
|
|
2014-03-04 08:06:42 +08:00
|
|
|
struct hpp_dimension {
|
|
|
|
const char *name;
|
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
int taken;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
|
|
|
|
|
|
|
|
static struct hpp_dimension hpp_sort_dimensions[] = {
|
|
|
|
DIM(PERF_HPP__OVERHEAD, "overhead"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
|
|
|
|
DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
|
2013-10-30 15:06:59 +08:00
|
|
|
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
|
2014-03-04 08:06:42 +08:00
|
|
|
DIM(PERF_HPP__SAMPLES, "sample"),
|
|
|
|
DIM(PERF_HPP__PERIOD, "period"),
|
|
|
|
};
|
|
|
|
|
|
|
|
#undef DIM
|
|
|
|
|
2014-03-03 10:46:55 +08:00
|
|
|
struct hpp_sort_entry {
|
|
|
|
struct perf_hpp_fmt hpp;
|
|
|
|
struct sort_entry *se;
|
|
|
|
};
|
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse_a;
|
|
|
|
struct hpp_sort_entry *hse_b;
|
|
|
|
|
|
|
|
if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
hse_a = container_of(a, struct hpp_sort_entry, hpp);
|
|
|
|
hse_b = container_of(b, struct hpp_sort_entry, hpp);
|
|
|
|
|
|
|
|
return hse_a->se == hse_b->se;
|
|
|
|
}
|
|
|
|
|
2014-07-31 13:47:37 +08:00
|
|
|
void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
|
2014-03-20 10:18:54 +08:00
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
|
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
return;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
2014-07-31 13:47:40 +08:00
|
|
|
hists__new_col_len(hists, hse->se->se_width_idx, strlen(fmt->name));
|
2014-03-20 10:18:54 +08:00
|
|
|
}
|
|
|
|
|
2014-03-03 10:46:55 +08:00
|
|
|
static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct perf_evsel *evsel)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-07-31 13:47:38 +08:00
|
|
|
size_t len = fmt->user_len;
|
2014-03-03 10:46:55 +08:00
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
|
2014-07-31 13:47:38 +08:00
|
|
|
if (!len)
|
2014-10-10 00:13:41 +08:00
|
|
|
len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
|
2014-07-31 13:47:38 +08:00
|
|
|
|
2014-07-31 13:47:40 +08:00
|
|
|
return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
|
2014-03-03 10:46:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
|
|
|
|
struct perf_hpp *hpp __maybe_unused,
|
|
|
|
struct perf_evsel *evsel)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-07-31 13:47:38 +08:00
|
|
|
size_t len = fmt->user_len;
|
2014-03-03 10:46:55 +08:00
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
|
2014-07-31 13:47:38 +08:00
|
|
|
if (!len)
|
2014-10-10 00:13:41 +08:00
|
|
|
len = hists__col_len(evsel__hists(evsel), hse->se->se_width_idx);
|
2014-07-31 13:47:38 +08:00
|
|
|
|
|
|
|
return len;
|
2014-03-03 10:46:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
|
|
|
|
struct hist_entry *he)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-07-31 13:47:38 +08:00
|
|
|
size_t len = fmt->user_len;
|
2014-03-03 10:46:55 +08:00
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
2014-07-31 13:47:38 +08:00
|
|
|
|
|
|
|
if (!len)
|
|
|
|
len = hists__col_len(he->hists, hse->se->se_width_idx);
|
2014-03-03 10:46:55 +08:00
|
|
|
|
|
|
|
return hse->se->se_snprintf(he, hpp->buf, hpp->size, len);
|
|
|
|
}
|
|
|
|
|
2015-01-08 08:45:46 +08:00
|
|
|
static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *a, struct hist_entry *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
return hse->se->se_cmp(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *a, struct hist_entry *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp;
|
|
|
|
return collapse_fn(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
|
|
|
|
struct hist_entry *a, struct hist_entry *b)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *);
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
sort_fn = hse->se->se_sort ?: hse->se->se_cmp;
|
|
|
|
return sort_fn(a, b);
|
|
|
|
}
|
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
static struct hpp_sort_entry *
|
|
|
|
__sort_dimension__alloc_hpp(struct sort_dimension *sd)
|
2014-03-03 10:46:55 +08:00
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse;
|
|
|
|
|
|
|
|
hse = malloc(sizeof(*hse));
|
|
|
|
if (hse == NULL) {
|
|
|
|
pr_err("Memory allocation failed\n");
|
2014-03-04 09:46:34 +08:00
|
|
|
return NULL;
|
2014-03-03 10:46:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
hse->se = sd->entry;
|
2014-07-31 13:47:40 +08:00
|
|
|
hse->hpp.name = sd->entry->se_header;
|
2014-03-03 10:46:55 +08:00
|
|
|
hse->hpp.header = __sort__hpp_header;
|
|
|
|
hse->hpp.width = __sort__hpp_width;
|
|
|
|
hse->hpp.entry = __sort__hpp_entry;
|
|
|
|
hse->hpp.color = NULL;
|
|
|
|
|
2015-01-08 08:45:46 +08:00
|
|
|
hse->hpp.cmp = __sort__hpp_cmp;
|
|
|
|
hse->hpp.collapse = __sort__hpp_collapse;
|
|
|
|
hse->hpp.sort = __sort__hpp_sort;
|
2014-03-03 10:46:55 +08:00
|
|
|
|
|
|
|
INIT_LIST_HEAD(&hse->hpp.list);
|
|
|
|
INIT_LIST_HEAD(&hse->hpp.sort_list);
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
hse->hpp.elide = false;
|
2014-07-31 13:47:37 +08:00
|
|
|
hse->hpp.len = 0;
|
2014-07-31 13:47:38 +08:00
|
|
|
hse->hpp.user_len = 0;
|
2014-03-03 10:46:55 +08:00
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
return hse;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
|
|
|
|
{
|
|
|
|
return format->header == __sort__hpp_header;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
|
|
|
|
|
|
|
|
if (hse == NULL)
|
|
|
|
return -1;
|
|
|
|
|
2014-03-03 10:46:55 +08:00
|
|
|
perf_hpp__register_sort_field(&hse->hpp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
|
|
|
|
{
|
|
|
|
struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
|
|
|
|
|
|
|
|
if (hse == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
perf_hpp__column_register(&hse->hpp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-05-19 13:19:30 +08:00
|
|
|
static int __sort_dimension__add(struct sort_dimension *sd)
|
2013-04-03 20:26:10 +08:00
|
|
|
{
|
|
|
|
if (sd->taken)
|
2014-03-03 10:46:55 +08:00
|
|
|
return 0;
|
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
if (__sort_dimension__add_hpp_sort(sd) < 0)
|
2014-03-03 10:46:55 +08:00
|
|
|
return -1;
|
2013-04-03 20:26:10 +08:00
|
|
|
|
|
|
|
if (sd->entry->se_collapse)
|
|
|
|
sort__need_collapse = 1;
|
|
|
|
|
|
|
|
sd->taken = 1;
|
2014-03-03 10:46:55 +08:00
|
|
|
|
|
|
|
return 0;
|
2013-04-03 20:26:10 +08:00
|
|
|
}
|
|
|
|
|
2014-03-04 08:06:42 +08:00
|
|
|
static int __hpp_dimension__add(struct hpp_dimension *hd)
|
|
|
|
{
|
|
|
|
if (!hd->taken) {
|
|
|
|
hd->taken = 1;
|
|
|
|
|
|
|
|
perf_hpp__register_sort_field(hd->fmt);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
static int __sort_dimension__add_output(struct sort_dimension *sd)
|
|
|
|
{
|
|
|
|
if (sd->taken)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (__sort_dimension__add_hpp_output(sd) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
sd->taken = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __hpp_dimension__add_output(struct hpp_dimension *hd)
|
|
|
|
{
|
|
|
|
if (!hd->taken) {
|
|
|
|
hd->taken = 1;
|
|
|
|
|
|
|
|
perf_hpp__column_register(hd->fmt);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-25 00:02:49 +08:00
|
|
|
int sort_dimension__add(const char *tok)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
2012-12-27 17:11:46 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &common_sort_dimensions[i];
|
2009-09-25 00:02:49 +08:00
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
2012-12-27 17:11:46 +08:00
|
|
|
|
2009-09-25 00:02:49 +08:00
|
|
|
if (sd->entry == &sort_parent) {
|
|
|
|
int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
|
|
|
|
if (ret) {
|
|
|
|
char err[BUFSIZ];
|
|
|
|
|
|
|
|
regerror(ret, &parent_regex, err, sizeof(err));
|
2010-04-02 23:30:57 +08:00
|
|
|
pr_err("Invalid regex: %s\n%s", parent_pattern, err);
|
|
|
|
return -EINVAL;
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
sort__has_parent = 1;
|
2013-04-05 09:26:36 +08:00
|
|
|
} else if (sd->entry == &sort_sym) {
|
2012-09-14 16:35:27 +08:00
|
|
|
sort__has_sym = 1;
|
perf diff: Support for different binaries
Currently, the perf diff only works with same binaries. That's because
it compares the symbol start address. It doesn't work if the perf.data
comes from different binaries. This patch matches the symbol names.
Actually, perf diff once intended to compare the symbol names. The
commit as below can look for a pair by name.
604c5c92972d (perf diff: Change the default sort order to "dso,symbol")
However, at that time, perf diff used a global list of dsos. That means
the binaries which has same name can only be loaded once. That's a
problem for comparing different binaries.
For example, we have an old binary and an updated binary. They very
likely have same name and most of the functions, so only dsos from old
binary will be loaded. When processing the data from updated binary,
perf still use the symbol information from old binary. That's wrong.
Then the commit as below used IP to replace symbol name.
9c443dfdd31e ("perf diff: Fix support for all --sort combinations")
>From that time, perf diff starts to compare the symbol address.
The global dsos is discarded from a patch in 2010.
a1645ce12adb ("perf: 'perf kvm' tool for monitoring guest performance
from host")
However, at that time, perf diff already compared by address. So perf
diff cannot work for different binaries as well.
This patch actually rolls back the perf diff to original design. The
document is also changed, so everybody knows the original design is to
compare the symbol names.
Here are some examples:
The only difference between example_v1.c and example_v2.c is the
location of f2 and f3. There is no change in behavior, but the previous
perf diff display the wrong differential profile.
example_v1.c
noinline void f3(void)
{
volatile int i;
for (i = 0; i < 10000;) {
if(i%2)
i++;
else
i++;
}
}
noinline void f2(void)
{
volatile int a = 100, b, c;
for (b = 0; b < 10000; b++)
c = a * b;
}
noinline void f1(void)
{
f2();
f3();
}
int main()
{
int i;
for (i = 0; i < 100000; i++)
f1();
}
example_v2.c
noinline void f2(void)
{
volatile int a = 100, b, c;
for (b = 0; b < 10000; b++)
c = a * b;
}
noinline void f3(void)
{
volatile int i;
for (i = 0; i < 10000;) {
if(i%2)
i++;
else
i++;
}
}
noinline void f1(void)
{
f2();
f3();
}
int main()
{
int i;
for (i = 0; i < 100000; i++)
f1();
}
[lk@localhost perf_diff]$ gcc example_v1.c -o example
[lk@localhost perf_diff]$ perf record -o example_v1.data ./example
[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.813 MB example_v1.data (~35522 samples) ]
[lk@localhost perf_diff]$ gcc example_v2.c -o example
[lk@localhost perf_diff]$ perf record -o example_v2.data ./example
[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.824 MB example_v2.data (~36015 samples) ]
Old perf diff result:
[lk@localhost perf_diff]$ perf diff example_v1.data example_v2.data
Event 'cycles'
Baseline Delta Shared Object Symbol
........ ....... ................ ...............................
[kernel.vmlinux] [k] __perf_event_task_sched_out
0.00% [kernel.vmlinux] [k] apic_timer_interrupt
[kernel.vmlinux] [k] idle_cpu
[kernel.vmlinux] [k] intel_pstate_timer_func
[kernel.vmlinux] [k] native_read_msr_safe
0.00% [kernel.vmlinux] [k] native_read_tsc
0.00% [kernel.vmlinux] [k] native_write_msr_safe
[kernel.vmlinux] [k] ntp_tick_length
0.00% [kernel.vmlinux] [k] rb_erase
0.00% [kernel.vmlinux] [k] tick_sched_timer
0.00% [kernel.vmlinux] [k] unmap_single_vma
0.00% [kernel.vmlinux] [k] update_wall_time
0.00% example [.] f1
46.24% example [.] f2
53.71% -7.55% example [.] f3
+53.81% example [.] f3
0.02% example [.] main
New perf diff result:
[lk@localhost perf_diff]$ perf diff example_v1.data example_v2.data
[kernel.vmlinux] [k] __perf_event_task_sched_out
0.00% [kernel.vmlinux] [k] apic_timer_interrupt
[kernel.vmlinux] [k] idle_cpu
[kernel.vmlinux] [k] intel_pstate_timer_func
[kernel.vmlinux] [k] native_read_msr_safe
0.00% [kernel.vmlinux] [k] native_read_tsc
0.00% [kernel.vmlinux] [k] native_write_msr_safe
[kernel.vmlinux] [k] ntp_tick_length
0.00% [kernel.vmlinux] [k] rb_erase
0.00% [kernel.vmlinux] [k] tick_sched_timer
0.00% [kernel.vmlinux] [k] unmap_single_vma
0.00% [kernel.vmlinux] [k] update_wall_time
0.00% example [.] f1
46.24% -0.08% example [.] f2
53.71% +0.11% example [.] f3
0.02% example [.] main
Signed-off-by: Kan Liang <kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1423460384-11645-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-09 13:39:44 +08:00
|
|
|
/*
|
|
|
|
* perf diff displays the performance difference amongst
|
|
|
|
* two or more perf.data files. Those files could come
|
|
|
|
* from different binaries. So we should not compare
|
|
|
|
* their ips, but the name of symbol.
|
|
|
|
*/
|
|
|
|
if (sort__mode == SORT_MODE__DIFF)
|
|
|
|
sd->entry->se_collapse = sort__sym_sort;
|
|
|
|
|
2013-12-18 13:21:10 +08:00
|
|
|
} else if (sd->entry == &sort_dso) {
|
|
|
|
sort__has_dso = 1;
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
|
|
|
|
2014-05-19 13:19:30 +08:00
|
|
|
return __sort_dimension__add(sd);
|
2009-09-25 00:02:49 +08:00
|
|
|
}
|
2012-12-27 17:11:46 +08:00
|
|
|
|
2014-03-04 08:06:42 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
|
|
|
|
struct hpp_dimension *hd = &hpp_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, hd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return __hpp_dimension__add(hd);
|
|
|
|
}
|
|
|
|
|
2012-12-27 17:11:46 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &bstack_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
2013-04-01 19:35:20 +08:00
|
|
|
if (sort__mode != SORT_MODE__BRANCH)
|
2012-12-27 17:11:46 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
|
|
|
|
sort__has_sym = 1;
|
|
|
|
|
2014-05-19 13:19:30 +08:00
|
|
|
__sort_dimension__add(sd);
|
2012-12-27 17:11:46 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-03 20:26:11 +08:00
|
|
|
for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &memory_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (sort__mode != SORT_MODE__MEMORY)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (sd->entry == &sort_mem_daddr_sym)
|
|
|
|
sort__has_sym = 1;
|
|
|
|
|
2014-05-19 13:19:30 +08:00
|
|
|
__sort_dimension__add(sd);
|
2013-04-03 20:26:11 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-25 00:02:49 +08:00
|
|
|
return -ESRCH;
|
|
|
|
}
|
2009-12-15 06:09:29 +08:00
|
|
|
|
2014-03-18 10:31:39 +08:00
|
|
|
static const char *get_default_sort_order(void)
|
|
|
|
{
|
|
|
|
const char *default_sort_orders[] = {
|
|
|
|
default_sort_order,
|
|
|
|
default_branch_sort_order,
|
|
|
|
default_mem_sort_order,
|
|
|
|
default_top_sort_order,
|
|
|
|
default_diff_sort_order,
|
|
|
|
};
|
|
|
|
|
|
|
|
BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
|
|
|
|
|
|
|
|
return default_sort_orders[sort__mode];
|
|
|
|
}
|
|
|
|
|
2014-08-23 20:59:48 +08:00
|
|
|
static int setup_sort_order(void)
|
|
|
|
{
|
|
|
|
char *new_sort_order;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Append '+'-prefixed sort order to the default sort
|
|
|
|
* order string.
|
|
|
|
*/
|
|
|
|
if (!sort_order || is_strict_order(sort_order))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (sort_order[1] == '\0') {
|
|
|
|
error("Invalid --sort key: `+'");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We allocate new sort_order string, but we never free it,
|
|
|
|
* because it's checked over the rest of the code.
|
|
|
|
*/
|
|
|
|
if (asprintf(&new_sort_order, "%s,%s",
|
|
|
|
get_default_sort_order(), sort_order + 1) < 0) {
|
|
|
|
error("Not enough memory to set up --sort");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
sort_order = new_sort_order;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
static int __setup_sorting(void)
|
2009-12-15 06:09:29 +08:00
|
|
|
{
|
2014-03-18 10:31:39 +08:00
|
|
|
char *tmp, *tok, *str;
|
2014-08-23 20:59:48 +08:00
|
|
|
const char *sort_keys;
|
2013-02-06 13:57:16 +08:00
|
|
|
int ret = 0;
|
2009-12-15 06:09:29 +08:00
|
|
|
|
2014-08-23 20:59:48 +08:00
|
|
|
ret = setup_sort_order();
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
sort_keys = sort_order;
|
2014-03-04 09:46:34 +08:00
|
|
|
if (sort_keys == NULL) {
|
2014-08-22 21:58:38 +08:00
|
|
|
if (is_strict_order(field_order)) {
|
2014-03-04 09:46:34 +08:00
|
|
|
/*
|
|
|
|
* If user specified field order but no sort order,
|
|
|
|
* we'll honor it and not add default sort orders.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-03-18 10:31:39 +08:00
|
|
|
sort_keys = get_default_sort_order();
|
2014-03-04 09:46:34 +08:00
|
|
|
}
|
2014-03-18 10:31:39 +08:00
|
|
|
|
|
|
|
str = strdup(sort_keys);
|
2013-02-06 13:57:17 +08:00
|
|
|
if (str == NULL) {
|
|
|
|
error("Not enough memory to setup sort keys");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2009-12-15 06:09:29 +08:00
|
|
|
for (tok = strtok_r(str, ", ", &tmp);
|
|
|
|
tok; tok = strtok_r(NULL, ", ", &tmp)) {
|
2013-02-06 13:57:16 +08:00
|
|
|
ret = sort_dimension__add(tok);
|
2012-12-27 17:11:46 +08:00
|
|
|
if (ret == -EINVAL) {
|
|
|
|
error("Invalid --sort key: `%s'", tok);
|
2013-02-06 13:57:16 +08:00
|
|
|
break;
|
2012-12-27 17:11:46 +08:00
|
|
|
} else if (ret == -ESRCH) {
|
2009-12-15 06:09:29 +08:00
|
|
|
error("Unknown --sort key: `%s'", tok);
|
2013-02-06 13:57:16 +08:00
|
|
|
break;
|
2009-12-15 06:09:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(str);
|
2013-02-06 13:57:16 +08:00
|
|
|
return ret;
|
2009-12-15 06:09:29 +08:00
|
|
|
}
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 23:49:27 +08:00
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
void perf_hpp__set_elide(int idx, bool elide)
|
2014-03-18 12:00:59 +08:00
|
|
|
{
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
struct hpp_sort_entry *hse;
|
2014-03-18 12:00:59 +08:00
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
perf_hpp__for_each_format(fmt) {
|
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
if (hse->se->se_width_idx == idx) {
|
|
|
|
fmt->elide = elide;
|
|
|
|
break;
|
|
|
|
}
|
2014-03-18 12:00:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 23:49:27 +08:00
|
|
|
{
|
|
|
|
if (list && strlist__nr_entries(list) == 1) {
|
|
|
|
if (fp != NULL)
|
|
|
|
fprintf(fp, "# %s: %s\n", list_name,
|
|
|
|
strlist__entry(list, 0)->s);
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool get_elide(int idx, FILE *output)
|
|
|
|
{
|
|
|
|
switch (idx) {
|
|
|
|
case HISTC_SYMBOL:
|
|
|
|
return __get_elide(symbol_conf.sym_list, "symbol", output);
|
|
|
|
case HISTC_DSO:
|
|
|
|
return __get_elide(symbol_conf.dso_list, "dso", output);
|
|
|
|
case HISTC_COMM:
|
|
|
|
return __get_elide(symbol_conf.comm_list, "comm", output);
|
|
|
|
default:
|
|
|
|
break;
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 23:49:27 +08:00
|
|
|
}
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
|
|
|
|
if (sort__mode != SORT_MODE__BRANCH)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (idx) {
|
|
|
|
case HISTC_SYMBOL_FROM:
|
|
|
|
return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
|
|
|
|
case HISTC_SYMBOL_TO:
|
|
|
|
return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
|
|
|
|
case HISTC_DSO_FROM:
|
|
|
|
return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
|
|
|
|
case HISTC_DSO_TO:
|
|
|
|
return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
perf diff: Use perf_session__fprintf_hists just like 'perf record'
That means that almost everything you can do with 'perf report'
can be done with 'perf diff', for instance:
$ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2699
samples) ] $ perf record -f find / > /dev/null
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.062 MB perf.data (~2687
samples) ] perf diff | head -8
9.02% +1.00% find libc-2.10.1.so [.] _IO_vfprintf_internal
2.91% -1.00% find [kernel] [k] __kmalloc
2.85% -1.00% find [kernel] [k] ext4_htree_store_dirent
1.99% -1.00% find [kernel] [k] _atomic_dec_and_lock
2.44% find [kernel] [k] half_md4_transform
$
So if you want to zoom into libc:
$ perf diff --dsos libc-2.10.1.so | head -8
37.34% find [.] _IO_vfprintf_internal
10.34% find [.] __GI_memmove
8.25% +2.00% find [.] _int_malloc
5.07% -1.00% find [.] __GI_mempcpy
7.62% +2.00% find [.] _int_free
$
And if there were multiple commands using libc, it is also
possible to aggregate them all by using --sort symbol:
$ perf diff --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% [.] __GI_mempcpy
7.62% +2.00% [.] _int_free
$
The displacement column now is off by default, to use it:
perf diff -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34% [.] _IO_vfprintf_internal
10.34% [.] __GI_memmove
8.25% +2.00% [.] _int_malloc
5.07% -1.00% +2 [.] __GI_mempcpy
7.62% +2.00% -1 [.] _int_free
$
Using -t/--field-separator can be used for scripting:
$ perf diff -t, -m --dsos libc-2.10.1.so --sort symbol | head -8
37.34, , ,[.] _IO_vfprintf_internal
10.34, , ,[.] __GI_memmove
8.25,+2.00%, ,[.] _int_malloc
5.07,-1.00%, +2,[.] __GI_mempcpy
7.62,+2.00%, -1,[.] _int_free
6.99,+1.00%, -1,[.] _IO_new_file_xsputn
1.89,-2.00%, +4,[.] __readdir64
$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1260978567-550-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-12-16 23:49:27 +08:00
|
|
|
}
|
2013-04-03 20:26:19 +08:00
|
|
|
|
|
|
|
void sort__setup_elide(FILE *output)
|
|
|
|
{
|
2014-05-19 13:19:30 +08:00
|
|
|
struct perf_hpp_fmt *fmt;
|
|
|
|
struct hpp_sort_entry *hse;
|
2013-11-08 16:53:42 +08:00
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
perf_hpp__for_each_format(fmt) {
|
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
hse = container_of(fmt, struct hpp_sort_entry, hpp);
|
|
|
|
fmt->elide = get_elide(hse->se->se_width_idx, output);
|
2013-04-03 20:26:19 +08:00
|
|
|
}
|
|
|
|
|
2013-11-08 16:53:42 +08:00
|
|
|
/*
|
|
|
|
* It makes no sense to elide all of sort entries.
|
|
|
|
* Just revert them to show up again.
|
|
|
|
*/
|
2014-05-19 13:19:30 +08:00
|
|
|
perf_hpp__for_each_format(fmt) {
|
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
if (!fmt->elide)
|
2013-11-08 16:53:42 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-19 13:19:30 +08:00
|
|
|
perf_hpp__for_each_format(fmt) {
|
|
|
|
if (!perf_hpp__is_sort_entry(fmt))
|
|
|
|
continue;
|
|
|
|
|
perf tools: Move elide bool into perf_hpp_fmt struct
After output/sort fields refactoring, it's expensive
to check the elide bool in its current location inside
the 'struct sort_entry'.
The perf_hpp__should_skip function gets highly noticable in
workloads with high number of output/sort fields, like for:
$ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio
Performance report:
9.70% perf [.] perf_hpp__should_skip
Moving the elide bool into the 'struct perf_hpp_fmt', which
makes the perf_hpp__should_skip just single struct read.
Got speedup of around 22% for my test perf.data workload.
The change should not harm any other workload types.
Performance counter stats for (10 runs):
before:
358,319,732,626 cycles ( +- 0.55% )
467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% )
150.943975206 seconds time elapsed ( +- 0.62% )
now:
278,785,972,990 cycles ( +- 0.12% )
370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% )
116.416670507 seconds time elapsed ( +- 0.31% )
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
2014-05-23 23:15:47 +08:00
|
|
|
fmt->elide = false;
|
2014-05-19 13:19:30 +08:00
|
|
|
}
|
2013-04-03 20:26:19 +08:00
|
|
|
}
|
2014-03-04 09:46:34 +08:00
|
|
|
|
|
|
|
static int output_field_add(char *tok)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &common_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return __sort_dimension__add_output(sd);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
|
|
|
|
struct hpp_dimension *hd = &hpp_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, hd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return __hpp_dimension__add_output(hd);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &bstack_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return __sort_dimension__add_output(sd);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
|
|
|
|
struct sort_dimension *sd = &memory_sort_dimensions[i];
|
|
|
|
|
|
|
|
if (strncasecmp(tok, sd->name, strlen(tok)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return __sort_dimension__add_output(sd);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ESRCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void reset_dimensions(void)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++)
|
|
|
|
common_sort_dimensions[i].taken = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++)
|
|
|
|
hpp_sort_dimensions[i].taken = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++)
|
|
|
|
bstack_sort_dimensions[i].taken = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++)
|
|
|
|
memory_sort_dimensions[i].taken = 0;
|
|
|
|
}
|
|
|
|
|
2014-08-22 21:58:38 +08:00
|
|
|
bool is_strict_order(const char *order)
|
|
|
|
{
|
|
|
|
return order && (*order != '+');
|
|
|
|
}
|
|
|
|
|
2014-03-04 09:46:34 +08:00
|
|
|
static int __setup_output_field(void)
|
|
|
|
{
|
2014-08-22 21:58:38 +08:00
|
|
|
char *tmp, *tok, *str, *strp;
|
|
|
|
int ret = -EINVAL;
|
2014-03-04 09:46:34 +08:00
|
|
|
|
|
|
|
if (field_order == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
reset_dimensions();
|
|
|
|
|
2014-08-22 21:58:38 +08:00
|
|
|
strp = str = strdup(field_order);
|
2014-03-04 09:46:34 +08:00
|
|
|
if (str == NULL) {
|
|
|
|
error("Not enough memory to setup output fields");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2014-08-22 21:58:38 +08:00
|
|
|
if (!is_strict_order(field_order))
|
|
|
|
strp++;
|
|
|
|
|
|
|
|
if (!strlen(strp)) {
|
|
|
|
error("Invalid --fields key: `+'");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (tok = strtok_r(strp, ", ", &tmp);
|
2014-03-04 09:46:34 +08:00
|
|
|
tok; tok = strtok_r(NULL, ", ", &tmp)) {
|
|
|
|
ret = output_field_add(tok);
|
|
|
|
if (ret == -EINVAL) {
|
|
|
|
error("Invalid --fields key: `%s'", tok);
|
|
|
|
break;
|
|
|
|
} else if (ret == -ESRCH) {
|
|
|
|
error("Unknown --fields key: `%s'", tok);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-22 21:58:38 +08:00
|
|
|
out:
|
2014-03-04 09:46:34 +08:00
|
|
|
free(str);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int setup_sorting(void)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __setup_sorting();
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
if (parent_pattern != default_parent_pattern) {
|
|
|
|
err = sort_dimension__add("parent");
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
reset_dimensions();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* perf diff doesn't use default hpp output fields.
|
|
|
|
*/
|
|
|
|
if (sort__mode != SORT_MODE__DIFF)
|
|
|
|
perf_hpp__init();
|
|
|
|
|
|
|
|
err = __setup_output_field();
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* copy sort keys to output fields */
|
|
|
|
perf_hpp__setup_output_field();
|
|
|
|
/* and then copy output fields to sort keys */
|
|
|
|
perf_hpp__append_sort_keys();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2014-05-07 17:42:24 +08:00
|
|
|
|
|
|
|
void reset_output_field(void)
|
|
|
|
{
|
|
|
|
sort__need_collapse = 0;
|
|
|
|
sort__has_parent = 0;
|
|
|
|
sort__has_sym = 0;
|
|
|
|
sort__has_dso = 0;
|
|
|
|
|
2014-05-23 09:59:01 +08:00
|
|
|
field_order = NULL;
|
|
|
|
sort_order = NULL;
|
|
|
|
|
2014-05-07 17:42:24 +08:00
|
|
|
reset_dimensions();
|
|
|
|
perf_hpp__reset_output_field();
|
|
|
|
}
|