linux/tools/perf/util/hist.h

143 lines
4.4 KiB
C
Raw Normal View History

#ifndef __PERF_HIST_H
#define __PERF_HIST_H
#include <linux/types.h>
#include <pthread.h>
#include "callchain.h"
extern struct callchain_param callchain_param;
struct hist_entry;
struct addr_location;
struct symbol;
/*
* The kernel collects the number of events it couldn't send in a stretch and
* when possible sends this number in a PERF_RECORD_LOST event. The number of
* such "chunks" of lost events is stored in .nr_events[PERF_EVENT_LOST] while
* total_lost tells exactly how many events the kernel in fact lost, i.e. it is
* the sum of all struct lost_event.lost fields reported.
*
* The total_period is needed because by default auto-freq is used, so
* multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
* the total number of low level events, it is necessary to to sum all struct
* sample_event.period and stash the result in total_period.
*/
2010-05-11 00:04:11 +08:00
struct events_stats {
u64 total_period;
u64 total_lost;
u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX];
u32 nr_unknown_events;
u32 nr_invalid_chains;
u32 nr_unknown_id;
2010-05-11 00:04:11 +08:00
};
enum hist_column {
HISTC_SYMBOL,
HISTC_DSO,
HISTC_THREAD,
HISTC_COMM,
HISTC_PARENT,
HISTC_CPU,
HISTC_NR_COLS, /* Last entry */
};
struct thread;
struct dso;
2010-05-11 00:04:11 +08:00
struct hists {
struct rb_root entries_in_array[2];
struct rb_root *entries_in;
2010-05-11 00:04:11 +08:00
struct rb_root entries;
struct rb_root entries_collapsed;
u64 nr_entries;
const struct thread *thread_filter;
const struct dso *dso_filter;
pthread_mutex_t lock;
2010-05-11 00:04:11 +08:00
struct events_stats stats;
u64 event_stream;
u16 col_len[HISTC_NR_COLS];
perf callchain: Feed callchains into a cursor The callchains are fed with an array of a fixed size. As a result we iterate over each callchains three times: - 1st to resolve symbols - 2nd to filter out context boundaries - 3rd for the insertion into the tree This also involves some pairs of memory allocation/deallocation everytime we insert a callchain, for the filtered out array of addresses and for the array of symbols that comes along. Instead, feed the callchains through a linked list with persistent allocations. It brings several pros like: - Merge the 1st and 2nd iterations in one. That was possible before but in a way that would involve allocating an array slightly taller than necessary because we don't know in advance the number of context boundaries to filter out. - Much lesser allocations/deallocations. The linked list keeps persistent empty entries for the next usages and is extendable at will. - Makes it easier for multiple sources of callchains to feed a stacktrace together. This is deemed to pave the way for cfi based callchains wherein traditional frame pointer based kernel stacktraces will precede cfi based user ones, producing an overall callchain which size is hardly predictable. This requirement makes the static array obsolete and makes a linked list based iterator a much more flexible fit. Basic testing on a big perf file containing callchains (~ 176 MB) has shown a throughput gain of about 11% with perf report. Cc: Ingo Molnar <mingo@elte.hu> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1294977121-5700-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-01-14 11:51:58 +08:00
/* Best would be to reuse the session callchain cursor */
struct callchain_cursor callchain_cursor;
2010-05-11 00:04:11 +08:00
};
void hists__init(struct hists *hists);
2010-05-11 00:04:11 +08:00
struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al,
struct symbol *parent, u64 period);
extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
struct hists *pair_hists, bool show_displacement,
long displacement, FILE *fp, u64 session_total);
2010-05-11 00:04:11 +08:00
int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
struct hists *hists);
void hist_entry__free(struct hist_entry *);
void hists__output_resort(struct hists *self);
void hists__output_resort_threaded(struct hists *hists);
2010-05-11 00:04:11 +08:00
void hists__collapse_resort(struct hists *self);
void hists__collapse_resort_threaded(struct hists *hists);
void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
void hists__decay_entries_threaded(struct hists *hists, bool zap_user,
bool zap_kernel);
perf top: Reuse the 'report' hist_entry/hists classes This actually fixes several problems we had in the old 'perf top': 1. Unresolved symbols not show, limitation that came from the old "KernelTop" codebase, to solve it we would need to do changes that would make sym_entry have most of the hist_entry fields. 2. It was using the number of samples, not the sum of sample->period. And brings the --sort code that allows us to have all the views in 'perf report', for instance: [root@emilia ~]# perf top --sort dso PerfTop: 5903 irqs/sec kernel:77.5% exact: 0.0% [1000Hz cycles], (all, 8 CPUs) ------------------------------------------------------------------------------ 31.59% libcrypto.so.1.0.0 21.55% [kernel] 18.57% libpython2.6.so.1.0 7.04% libc-2.12.so 6.99% _backend_agg.so 4.72% sshd 1.48% multiarray.so 1.39% libfreetype.so.6.3.22 1.37% perf 0.71% libgobject-2.0.so.0.2200.5 0.53% [tg3] 0.48% libglib-2.0.so.0.2200.5 0.44% libstdc++.so.6.0.13 0.40% libcairo.so.2.10800.8 0.38% libm-2.12.so 0.34% umath.so 0.30% libgdk-x11-2.0.so.0.1800.9 0.22% libpthread-2.12.so 0.20% libgtk-x11-2.0.so.0.1800.9 0.20% librt-2.12.so 0.15% _path.so 0.13% libpango-1.0.so.0.2800.1 0.11% libatlas.so.3.0 0.09% ft2font.so 0.09% libpangoft2-1.0.so.0.2800.1 0.08% libX11.so.6.3.0 0.07% [vdso] 0.06% cyclictest ^C All the filter lists can be used as well: --dsos, --comms, --symbols, etc. The 'perf report' TUI is also reused, being possible to apply all the zoom operations, do annotation, etc. This change will allow multiple simplifications in the symbol system as well, that will be detailed in upcoming changesets. Cc: David Ahern <dsahern@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/n/tip-xzaaldxq7zhqrrxdxjifk1mh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2011-10-06 06:16:15 +08:00
void hists__output_recalc_col_len(struct hists *hists, int max_rows);
void hists__inc_nr_events(struct hists *self, u32 type);
size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
2010-05-11 00:04:11 +08:00
size_t hists__fprintf(struct hists *self, struct hists *pair,
bool show_displacement, bool show_header,
int max_rows, int max_cols, FILE *fp);
int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr);
int hist_entry__annotate(struct hist_entry *self, size_t privsize);
void hists__filter_by_dso(struct hists *hists);
void hists__filter_by_thread(struct hists *hists);
u16 hists__col_len(struct hists *self, enum hist_column col);
void hists__set_col_len(struct hists *self, enum hist_column col, u16 len);
bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len);
struct perf_evlist;
#ifdef NO_NEWT_SUPPORT
static inline
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used,
const char *help __used,
void(*timer)(void *arg) __used,
void *arg __used,
int refresh __used)
{
return 0;
}
static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
int evidx __used,
int nr_events __used,
void(*timer)(void *arg) __used,
void *arg __used,
int delay_secs __used)
{
return 0;
}
#define K_LEFT -1
#define K_RIGHT -2
#else
#include "ui/keysyms.h"
int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events,
void(*timer)(void *arg), void *arg, int delay_secs);
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
void(*timer)(void *arg), void *arg,
int refresh);
#endif
unsigned int hists__sort_list_width(struct hists *self);
#endif /* __PERF_HIST_H */