perf report: Cache srclines for callchain nodes
On one hand this ensures that the memory is properly freed when the DSO gets freed. On the other hand this significantly speeds up the processing of the callchain nodes when lots of srclines are requested. For one of my data files e.g.: Before: Performance counter stats for 'perf report -s srcline -g srcline --stdio': 52496.495043 task-clock (msec) # 0.999 CPUs utilized 634 context-switches # 0.012 K/sec 2 cpu-migrations # 0.000 K/sec 191,561 page-faults # 0.004 M/sec 165,074,498,235 cycles # 3.144 GHz 334,170,832,408 instructions # 2.02 insn per cycle 90,220,029,745 branches # 1718.591 M/sec 654,525,177 branch-misses # 0.73% of all branches 52.533273822 seconds time elapsedProcessed 236605 events and lost 40 chunks! After: Performance counter stats for 'perf report -s srcline -g srcline --stdio': 22606.323706 task-clock (msec) # 1.000 CPUs utilized 31 context-switches # 0.001 K/sec 0 cpu-migrations # 0.000 K/sec 185,471 page-faults # 0.008 M/sec 71,188,113,681 cycles # 3.149 GHz 133,204,943,083 instructions # 1.87 insn per cycle 34,886,384,979 branches # 1543.214 M/sec 278,214,495 branch-misses # 0.80% of all branches 22.609857253 seconds time elapsed Note that the difference is only this large when `--inline` is not passed. In such situations, we would use the inliner cache and thus do not run this code path that often. I think that this cache should actually be used in other places, too. When looking at the valgrind leak report for perf report, we see tons of srclines being leaked, most notably from calls to hist_entry__get_srcline. The problem is that get_srcline has many different formatting options (show_sym, show_addr, potentially even unwind_inlines when calling __get_srcline directly). As such, the srcline cannot easily be cached for all calls, or we'd have to add caches for all formatting combinations (6 so far). An alternative would be to remove the formatting options and handle that on a different level - i.e. print the sym/addr on demand wherever we actually output something. And the unwind_inlines could be moved into a separate function that does not return the srcline. Signed-off-by: Milian Wolff <milian.wolff@kdab.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jin Yao <yao.jin@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20171019113836.5548-4-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
b38775cf76
commit
21ac9d547f
|
@ -1203,6 +1203,7 @@ struct dso *dso__new(const char *name)
|
|||
dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
|
||||
dso->data.cache = RB_ROOT;
|
||||
dso->inlined_nodes = RB_ROOT;
|
||||
dso->srclines = RB_ROOT;
|
||||
dso->data.fd = -1;
|
||||
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
|
||||
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
|
||||
|
@ -1237,6 +1238,7 @@ void dso__delete(struct dso *dso)
|
|||
|
||||
/* free inlines first, as they reference symbols */
|
||||
inlines__tree_delete(&dso->inlined_nodes);
|
||||
srcline__tree_delete(&dso->srclines);
|
||||
for (i = 0; i < MAP__NR_TYPES; ++i)
|
||||
symbols__delete(&dso->symbols[i]);
|
||||
|
||||
|
|
|
@ -142,6 +142,7 @@ struct dso {
|
|||
struct rb_root symbols[MAP__NR_TYPES];
|
||||
struct rb_root symbol_names[MAP__NR_TYPES];
|
||||
struct rb_root inlined_nodes;
|
||||
struct rb_root srclines;
|
||||
struct {
|
||||
u64 addr;
|
||||
struct symbol *symbol;
|
||||
|
|
|
@ -1711,11 +1711,22 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
|
|||
|
||||
static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)
|
||||
{
|
||||
if (!map || callchain_param.key == CCKEY_FUNCTION)
|
||||
return NULL;
|
||||
char *srcline = NULL;
|
||||
|
||||
return get_srcline(map->dso, map__rip_2objdump(map, ip),
|
||||
sym, false, callchain_param.key == CCKEY_ADDRESS);
|
||||
if (!map || callchain_param.key == CCKEY_FUNCTION)
|
||||
return srcline;
|
||||
|
||||
srcline = srcline__tree_find(&map->dso->srclines, ip);
|
||||
if (!srcline) {
|
||||
bool show_sym = false;
|
||||
bool show_addr = callchain_param.key == CCKEY_ADDRESS;
|
||||
|
||||
srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
|
||||
sym, show_sym, show_addr);
|
||||
srcline__tree_insert(&map->dso->srclines, ip, srcline);
|
||||
}
|
||||
|
||||
return srcline;
|
||||
}
|
||||
|
||||
struct iterations {
|
||||
|
|
|
@ -542,6 +542,72 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
|
|||
return __get_srcline(dso, addr, sym, show_sym, show_addr, false);
|
||||
}
|
||||
|
||||
struct srcline_node {
|
||||
u64 addr;
|
||||
char *srcline;
|
||||
struct rb_node rb_node;
|
||||
};
|
||||
|
||||
void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline)
|
||||
{
|
||||
struct rb_node **p = &tree->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct srcline_node *i, *node;
|
||||
|
||||
node = zalloc(sizeof(struct srcline_node));
|
||||
if (!node) {
|
||||
perror("not enough memory for the srcline node");
|
||||
return;
|
||||
}
|
||||
|
||||
node->addr = addr;
|
||||
node->srcline = srcline;
|
||||
|
||||
while (*p != NULL) {
|
||||
parent = *p;
|
||||
i = rb_entry(parent, struct srcline_node, rb_node);
|
||||
if (addr < i->addr)
|
||||
p = &(*p)->rb_left;
|
||||
else
|
||||
p = &(*p)->rb_right;
|
||||
}
|
||||
rb_link_node(&node->rb_node, parent, p);
|
||||
rb_insert_color(&node->rb_node, tree);
|
||||
}
|
||||
|
||||
char *srcline__tree_find(struct rb_root *tree, u64 addr)
|
||||
{
|
||||
struct rb_node *n = tree->rb_node;
|
||||
|
||||
while (n) {
|
||||
struct srcline_node *i = rb_entry(n, struct srcline_node,
|
||||
rb_node);
|
||||
|
||||
if (addr < i->addr)
|
||||
n = n->rb_left;
|
||||
else if (addr > i->addr)
|
||||
n = n->rb_right;
|
||||
else
|
||||
return i->srcline;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void srcline__tree_delete(struct rb_root *tree)
|
||||
{
|
||||
struct srcline_node *pos;
|
||||
struct rb_node *next = rb_first(tree);
|
||||
|
||||
while (next) {
|
||||
pos = rb_entry(next, struct srcline_node, rb_node);
|
||||
next = rb_next(&pos->rb_node);
|
||||
rb_erase(&pos->rb_node, tree);
|
||||
free_srcline(pos->srcline);
|
||||
zfree(&pos);
|
||||
}
|
||||
}
|
||||
|
||||
struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
|
||||
struct symbol *sym)
|
||||
{
|
||||
|
|
|
@ -15,6 +15,13 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
|
|||
bool show_sym, bool show_addr, bool unwind_inlines);
|
||||
void free_srcline(char *srcline);
|
||||
|
||||
/* insert the srcline into the DSO, which will take ownership */
|
||||
void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline);
|
||||
/* find previously inserted srcline */
|
||||
char *srcline__tree_find(struct rb_root *tree, u64 addr);
|
||||
/* delete all srclines within the tree */
|
||||
void srcline__tree_delete(struct rb_root *tree);
|
||||
|
||||
#define SRCLINE_UNKNOWN ((char *) "??:0")
|
||||
|
||||
struct inline_list {
|
||||
|
|
Loading…
Reference in New Issue