perf/core improvements and fixes.

. Fix memcpy benchmark for large sizes, from Andi Kleen.
 
 . Support callchain sorting based on addresses, from Andi Kleen
 
 . Move weight back to common sort keys, From Andi Kleen.
 
 . Fix named threads support in 'perf script', from David Ahern.
 
 . Handle ENODEV on default cycles event, fix from David Ahern.
 
 . More install tests, from Jiri Olsa.
 
 . Fix build with perl 5.18, from Kirill A. Shutemov.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.13 (GNU/Linux)
 
 iQIcBAABAgAGBQJR7ZQjAAoJENZQFvNTUqpA/A8P/1KrHaw0ZaPqVWwfFvvVva3P
 ECnT0olyxBLjkE36ZjCVo/h26K2HxDJczQs5mixfPS2TEGYwciz2LyGB3Yl3PSMq
 +PiqIDHixq+r4lxf/iRy8rLYz8AD3fmWkr261YV9DVfjwTEivw69ewpYO/cSh1w6
 NmUnSlcJuG10O//4hBaf/QQMZ/tLwqnug41YWygnoWi0PNx4pt0x68VHtWVH4+1v
 EyGuPB9if+KtNO7clQeYJe9+qLtbUCop8nB9A5nAvdqjZhtPIANOc0eAR137nMnE
 CS8FVWcr9T1SL/kMDfj6Psp1p4QorgqfVnL7jePgq+jFE4vJo0E7bzTyA48h9tA2
 FabTxYIZ4ESmfcXM+X3I/BWyyeIh2NQ6pFk1w5tMUbUjwhAM0Xt0yotDLYxVjFfn
 npckStVtOK9t76XpHwi8331f6GiY61fpQRnkxBLx5c76A4+ESgf+BtPAezjGq5YQ
 MSBP+x04sOEcSuowdZskxWort42TfDAAVJffoPB0GPKp/YWgWFbD+VfImZ6hw44i
 TpUoBCte06V74E+/HE/A0rc6Z8NgKzGLI/6f3EOwz4tl6DuvQlzt8aos0IAHfJSH
 z9mrjt9b+cMboXUw7uiXxnZwFZdxdhbRH56TT8DHqUhaZdyaG88GmWzZ8xDxHCoZ
 auqWRlmWR8VK78QnrpNO
 =nilC
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

  * Fix memcpy benchmark for large sizes, from Andi Kleen.

  * Support callchain sorting based on addresses, from Andi Kleen

  * Move weight back to common sort keys, From Andi Kleen.

  * Fix named threads support in 'perf script', from David Ahern.

  * Handle ENODEV on default cycles event, fix from David Ahern.

  * More install tests, from Jiri Olsa.

  * Fix build with perl 5.18, from Kirill A. Shutemov.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2013-07-23 09:37:33 +02:00
commit 4f16d61f80
16 changed files with 124 additions and 40 deletions

View File

@ -115,7 +115,7 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.
-g [type,min[,limit],order]::
-g [type,min[,limit],order[,key]]::
--call-graph::
Display call chains using type, min percent threshold, optional print
limit and order.
@ -129,7 +129,11 @@ OPTIONS
- callee: callee based call graph.
- caller: inverted caller based call graph.
Default: fractal,0.5,callee.
key can be:
- function: compare on functions
- address: compare on individual code addresses
Default: fractal,0.5,callee,function.
-G::
--inverted::

View File

@ -631,10 +631,10 @@ $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<

View File

@ -117,6 +117,8 @@ static void alloc_mem(void **dst, void **src, size_t length)
*src = zalloc(length);
if (!*src)
die("memory allocation failed - maybe length is too large?\n");
/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
memset(*src, 0, length);
}
static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)

View File

@ -667,12 +667,23 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
}
/* get the call chain order */
if (!strcmp(tok2, "caller"))
if (!strncmp(tok2, "caller", strlen("caller")))
callchain_param.order = ORDER_CALLER;
else if (!strcmp(tok2, "callee"))
else if (!strncmp(tok2, "callee", strlen("callee")))
callchain_param.order = ORDER_CALLEE;
else
return -1;
/* Get the sort key */
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
if (!strncmp(tok2, "function", strlen("function")))
callchain_param.key = CCKEY_FUNCTION;
else if (!strncmp(tok2, "address", strlen("address")))
callchain_param.key = CCKEY_ADDRESS;
else
return -1;
setup:
if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain params\n");
@ -784,8 +795,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. "
"Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
"Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
"alias for inverted call graph"),
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",

View File

@ -397,10 +397,10 @@ static void print_sample_bts(union perf_event *event,
static void process_event(union perf_event *event, struct perf_sample *sample,
struct perf_evsel *evsel, struct machine *machine,
struct addr_location *al)
struct thread *thread,
struct addr_location *al __maybe_unused)
{
struct perf_event_attr *attr = &evsel->attr;
struct thread *thread = al->thread;
if (output[attr->type].fields == 0)
return;
@ -511,7 +511,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
return 0;
scripting_ops->process_event(event, sample, evsel, machine, &al);
scripting_ops->process_event(event, sample, evsel, machine, thread, &al);
evsel->hists.stats.total_period += sample->period;
return 0;

View File

@ -1,6 +1,8 @@
PERF := .
MK := Makefile
has = $(shell which $1 2>/dev/null)
# standard single make variable specified
make_clean_all := clean all
make_python_perf_so := python/perf.so
@ -25,6 +27,13 @@ make_help := help
make_doc := doc
make_perf_o := perf.o
make_util_map_o := util/map.o
make_install := install
make_install_bin := install-bin
make_install_doc := install-doc
make_install_man := install-man
make_install_html := install-html
make_install_info := install-info
make_install_pdf := install-pdf
# all the NO_* variable combined
make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
@ -50,14 +59,27 @@ run += make_no_backtrace
run += make_no_libnuma
run += make_no_libaudit
run += make_no_libbionic
run += make_tags
run += make_cscope
run += make_help
run += make_doc
run += make_perf_o
run += make_util_map_o
run += make_install
run += make_install_bin
# FIXME 'install-*' commented out till they're fixed
# run += make_install_doc
# run += make_install_man
# run += make_install_html
# run += make_install_info
# run += make_install_pdf
run += make_minimal
ifneq ($(call has,ctags),)
run += make_tags
endif
ifneq ($(call has,cscope),)
run += make_cscope
endif
# $(run_O) contains same portion of $(run) tests with '_O' attached
# to distinguish O=... tests
run_O := $(addsuffix _O,$(run))
@ -84,6 +106,31 @@ test_make_python_perf_so := test -f $(PERF)/python/perf.so
test_make_perf_o := test -f $(PERF)/perf.o
test_make_util_map_o := test -f $(PERF)/util/map.o
test_make_install := test -x $$TMP_DEST/bin/perf
test_make_install_O := $(test_make_install)
test_make_install_bin := $(test_make_install)
test_make_install_bin_O := $(test_make_install)
# FIXME nothing gets installed
test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1
test_make_install_man_O := $(test_make_install_man)
# FIXME nothing gets installed
test_make_install_doc := $(test_ok)
test_make_install_doc_O := $(test_ok)
# FIXME nothing gets installed
test_make_install_html := $(test_ok)
test_make_install_html_O := $(test_ok)
# FIXME nothing gets installed
test_make_install_info := $(test_ok)
test_make_install_info_O := $(test_ok)
# FIXME nothing gets installed
test_make_install_pdf := $(test_ok)
test_make_install_pdf_O := $(test_ok)
# Kbuild tests only
#test_make_python_perf_so_O := test -f $$TMP/tools/perf/python/perf.so
#test_make_perf_o_O := test -f $$TMP/tools/perf/perf.o
@ -95,7 +142,7 @@ test_make_util_map_o_O := true
test_default = test -x $(PERF)/perf
test = $(if $(test_$1),$(test_$1),$(test_default))
test_default_O = test -x $$TMP/perf
test_default_O = test -x $$TMP_O/perf
test_O = $(if $(test_$1),$(test_$1),$(test_default_O))
all:
@ -111,23 +158,27 @@ clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
$(run):
$(call clean)
@cmd="cd $(PERF) && make -f $(MK) $($@)"; \
@TMP_DEST=$$(mktemp -d); \
cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
echo "- $@: $$cmd" && echo $$cmd > $@ && \
( eval $$cmd ) >> $@ 2>&1; \
echo " test: $(call test,$@)"; \
$(call test,$@) && \
rm -f $@
rm -f $@ \
rm -rf $$TMP_DEST
$(run_O):
$(call clean)
@TMP=$$(mktemp -d); \
cmd="cd $(PERF) && make -f $(MK) $($(patsubst %_O,%,$@)) O=$$TMP"; \
@TMP_O=$$(mktemp -d); \
TMP_DEST=$$(mktemp -d); \
cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
echo "- $@: $$cmd" && echo $$cmd > $@ && \
( eval $$cmd ) >> $@ 2>&1 && \
echo " test: $(call test_O,$@)"; \
$(call test_O,$@) && \
rm -f $@ && \
rm -rf $$TMP
rm -rf $$TMP_O \
rm -rf $$TMP_DEST
all: $(run) $(run_O)
@echo OK

View File

@ -15,6 +15,7 @@
#include <errno.h>
#include <math.h>
#include "hist.h"
#include "util.h"
#include "callchain.h"
@ -327,7 +328,8 @@ append_chain(struct callchain_node *root,
/*
* Lookup in the current node
* If we have a symbol, then compare the start to match
* anywhere inside a function.
* anywhere inside a function, unless function
* mode is disabled.
*/
list_for_each_entry(cnode, &root->val, list) {
struct callchain_cursor_node *node;
@ -339,7 +341,8 @@ append_chain(struct callchain_node *root,
sym = node->sym;
if (cnode->ms.sym && sym) {
if (cnode->ms.sym && sym &&
callchain_param.key == CCKEY_FUNCTION) {
if (cnode->ms.sym->start != sym->start)
break;
} else if (cnode->ip != node->ip)

View File

@ -41,12 +41,18 @@ struct callchain_param;
typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
u64, struct callchain_param *);
enum chain_key {
CCKEY_FUNCTION,
CCKEY_ADDRESS
};
struct callchain_param {
enum chain_mode mode;
u32 print_limit;
double min_percent;
sort_chain_func_t sort;
enum chain_order order;
enum chain_key key;
};
struct callchain_list {

View File

@ -1482,7 +1482,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize)
{
if ((err == ENOENT || err == ENXIO) &&
if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
evsel->attr.type == PERF_TYPE_HARDWARE &&
evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
/*

View File

@ -24,7 +24,8 @@ enum hist_filter {
struct callchain_param callchain_param = {
.mode = CHAIN_GRAPH_REL,
.min_percent = 0.5,
.order = ORDER_CALLEE
.order = ORDER_CALLEE,
.key = CCKEY_FUNCTION
};
u16 hists__col_len(struct hists *hists, enum hist_column col)

View File

@ -261,7 +261,8 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct addr_location *al)
struct thread *thread,
struct addr_location *al)
{
struct format_field *field;
static char handler[256];
@ -272,7 +273,6 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
int cpu = sample->cpu;
void *data = sample->raw_data;
unsigned long long nsecs = sample->time;
struct thread *thread = al->thread;
char *comm = thread->comm;
dSP;
@ -351,7 +351,8 @@ static void perl_process_event_generic(union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct addr_location *al __maybe_unused)
struct thread *thread __maybe_unused,
struct addr_location *al __maybe_unused)
{
dSP;
@ -377,10 +378,11 @@ static void perl_process_event(union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine,
struct addr_location *al)
struct thread *thread,
struct addr_location *al)
{
perl_process_tracepoint(event, sample, evsel, machine, al);
perl_process_event_generic(event, sample, evsel, machine, al);
perl_process_tracepoint(event, sample, evsel, machine, thread, al);
perl_process_event_generic(event, sample, evsel, machine, thread, al);
}
static void run_start_sub(void)

View File

@ -225,6 +225,7 @@ static void python_process_tracepoint(union perf_event *perf_event
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct thread *thread,
struct addr_location *al)
{
PyObject *handler, *retval, *context, *t, *obj, *dict = NULL;
@ -238,7 +239,6 @@ static void python_process_tracepoint(union perf_event *perf_event
int cpu = sample->cpu;
void *data = sample->raw_data;
unsigned long long nsecs = sample->time;
struct thread *thread = al->thread;
char *comm = thread->comm;
t = PyTuple_New(MAX_FIELDS);
@ -345,12 +345,12 @@ static void python_process_general_event(union perf_event *perf_event
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused,
struct thread *thread,
struct addr_location *al)
{
PyObject *handler, *retval, *t, *dict;
static char handler_name[64];
unsigned n = 0;
struct thread *thread = al->thread;
/*
* Use the MAX_FIELDS to make the function expandable, though
@ -404,17 +404,18 @@ static void python_process_event(union perf_event *perf_event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine,
struct thread *thread,
struct addr_location *al)
{
switch (evsel->attr.type) {
case PERF_TYPE_TRACEPOINT:
python_process_tracepoint(perf_event, sample, evsel,
machine, al);
machine, thread, al);
break;
/* Reserve for future process_hw/sw/raw APIs */
default:
python_process_general_event(perf_event, sample, evsel,
machine, al);
machine, thread, al);
}
}

View File

@ -874,6 +874,8 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_PARENT, "parent", sort_parent),
DIM(SORT_CPU, "cpu", sort_cpu),
DIM(SORT_SRCLINE, "srcline", sort_srcline),
DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
};
#undef DIM
@ -893,8 +895,6 @@ static struct sort_dimension bstack_sort_dimensions[] = {
#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
static struct sort_dimension memory_sort_dimensions[] = {
DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),

View File

@ -143,6 +143,8 @@ enum sort_type {
SORT_PARENT,
SORT_CPU,
SORT_SRCLINE,
SORT_LOCAL_WEIGHT,
SORT_GLOBAL_WEIGHT,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@ -154,9 +156,7 @@ enum sort_type {
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
SORT_LOCAL_WEIGHT = __SORT_MEMORY_MODE,
SORT_GLOBAL_WEIGHT,
SORT_MEM_DADDR_SYMBOL,
SORT_MEM_DADDR_SYMBOL = __SORT_MEMORY_MODE,
SORT_MEM_DADDR_DSO,
SORT_MEM_LOCKED,
SORT_MEM_TLB,

View File

@ -39,7 +39,8 @@ static void process_event_unsupported(union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct perf_evsel *evsel __maybe_unused,
struct machine *machine __maybe_unused,
struct addr_location *al __maybe_unused)
struct thread *thread __maybe_unused,
struct addr_location *al __maybe_unused)
{
}

View File

@ -9,6 +9,7 @@ struct machine;
struct perf_sample;
union perf_event;
struct perf_tool;
struct thread;
extern struct pevent *perf_pevent;
@ -68,7 +69,8 @@ struct scripting_ops {
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine,
struct addr_location *al);
struct thread *thread,
struct addr_location *al);
int (*generate_script) (struct pevent *pevent, const char *outfile);
};