perf/core improvements and fixes:

callchains:
 
    Alexey Budankov:
 
   - Allow collecting LBR together with DWARF callchains, for workloads
     where the userspace stack size collected is not big enough for
     pure DWARF based unwinding.
 
   - Dump the LBR call stack in 'perf report -D'.
 
 perf top:
 
   Arnaldo Carvalho de Melo:
 
   - Show visual cue at start to state that the minimal set of samples
     are being collected prior to sorting/bucketizing/displaying.
 
 CoreSight (ARM hardware tracing):
 
   Leo Yan:
 
   - Support sample flags 'insn' and 'insnlen'.
 
 core:
 
   Adrian Hunter:
 
   - Add comment for 'idx' member in 'struct perf_sample_id.
 
 tools headers:
 
   Arnaldo Carvalho de Melo:
 
   - Synchronize linux/bits.h, which required grabbing a copy of the kernel
     const.h headers and some changes in the ordering of header directories.
 
   - Sync x86's asm/cpufeatures.h with the with the kernel, no change in
     any of the tools.
 
 libperf:
 
   Jiri Olsa:
 
   - Fix arch include paths.
 
 libtraceevent:
 
   Steven Rostedt (VMware):
 
   - Fix "robust" test of do_generate_dynamic_list_file.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCXVxFFAAKCRCyPKLppCJ+
 JwebAQCp3nc/q4rpE6v9W8b1uMAsmELl5q8D+74/1ZWT4dqPFAEA9ZmtsIi/7ZjV
 Dq3403hFUfk1kDacEvUXKgzi3PMq+Qc=
 =DK9x
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-5.4-20190820' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

callchains:

   Alexey Budankov:

  - Allow collecting LBR together with DWARF callchains, for workloads
    where the userspace stack size collected is not big enough for
    pure DWARF based unwinding.

  - Dump the LBR call stack in 'perf report -D'.

perf top:

  Arnaldo Carvalho de Melo:

  - Show visual cue at start to state that the minimal set of samples
    are being collected prior to sorting/bucketizing/displaying.

CoreSight (ARM hardware tracing):

  Leo Yan:

  - Support sample flags 'insn' and 'insnlen'.

core:

  Adrian Hunter:

  - Add comment for 'idx' member in 'struct perf_sample_id.

tools headers:

  Arnaldo Carvalho de Melo:

  - Synchronize linux/bits.h, which required grabbing a copy of the kernel
    const.h headers and some changes in the ordering of header directories.

  - Sync x86's asm/cpufeatures.h with the with the kernel, no change in
    any of the tools.

libperf:

  Jiri Olsa:

  - Fix arch include paths.

libtraceevent:

  Steven Rostedt (VMware):

  - Fix "robust" test of do_generate_dynamic_list_file.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2019-08-20 21:38:22 +02:00
commit 4e92b18e5b
20 changed files with 166 additions and 44 deletions

View File

@ -281,6 +281,8 @@
#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@ -394,5 +396,6 @@
#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -3,6 +3,7 @@
#define _TOOLS_LINUX_BITOPS_H_
#include <asm/types.h>
#include <limits.h>
#ifndef __WORDSIZE
#define __WORDSIZE (__SIZEOF_LONG__ * 8)
#endif

View File

@ -1,13 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_BITS_H
#define __LINUX_BITS_H
#include <linux/const.h>
#include <asm/bitsperlong.h>
#define BIT(nr) (1UL << (nr))
#define BIT_ULL(nr) (1ULL << (nr))
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
#define BIT(nr) (UL(1) << (nr))
#define BIT_ULL(nr) (ULL(1) << (nr))
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG))
#define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG))
#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
#define BITS_PER_BYTE 8
@ -17,10 +19,11 @@
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
*/
#define GENMASK(h, l) \
(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
(((~UL(0)) - (UL(1) << (l)) + 1) & \
(~UL(0) >> (BITS_PER_LONG - 1 - (h))))
#define GENMASK_ULL(h, l) \
(((~0ULL) - (1ULL << (l)) + 1) & \
(~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
(((~ULL(0)) - (ULL(1) << (l)) + 1) & \
(~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
#endif /* __LINUX_BITS_H */

View File

@ -0,0 +1,9 @@
#ifndef _LINUX_CONST_H
#define _LINUX_CONST_H
#include <uapi/linux/const.h>
#define UL(x) (_UL(x))
#define ULL(x) (_ULL(x))
#endif /* _LINUX_CONST_H */

View File

@ -1,22 +1,22 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if defined(__i386__) || defined(__x86_64__)
#include "../../arch/x86/include/uapi/asm/bitsperlong.h"
#include "../../../arch/x86/include/uapi/asm/bitsperlong.h"
#elif defined(__aarch64__)
#include "../../arch/arm64/include/uapi/asm/bitsperlong.h"
#include "../../../arch/arm64/include/uapi/asm/bitsperlong.h"
#elif defined(__powerpc__)
#include "../../arch/powerpc/include/uapi/asm/bitsperlong.h"
#include "../../../arch/powerpc/include/uapi/asm/bitsperlong.h"
#elif defined(__s390__)
#include "../../arch/s390/include/uapi/asm/bitsperlong.h"
#include "../../../arch/s390/include/uapi/asm/bitsperlong.h"
#elif defined(__sparc__)
#include "../../arch/sparc/include/uapi/asm/bitsperlong.h"
#include "../../../arch/sparc/include/uapi/asm/bitsperlong.h"
#elif defined(__mips__)
#include "../../arch/mips/include/uapi/asm/bitsperlong.h"
#include "../../../arch/mips/include/uapi/asm/bitsperlong.h"
#elif defined(__ia64__)
#include "../../arch/ia64/include/uapi/asm/bitsperlong.h"
#include "../../../arch/ia64/include/uapi/asm/bitsperlong.h"
#elif defined(__riscv)
#include "../../arch/riscv/include/uapi/asm/bitsperlong.h"
#include "../../../arch/riscv/include/uapi/asm/bitsperlong.h"
#elif defined(__alpha__)
#include "../../arch/alpha/include/uapi/asm/bitsperlong.h"
#include "../../../arch/alpha/include/uapi/asm/bitsperlong.h"
#else
#include <asm-generic/bitsperlong.h>
#endif

View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* const.h: Macros for dealing with constants. */
#ifndef _UAPI_LINUX_CONST_H
#define _UAPI_LINUX_CONST_H
/* Some constant macros are used in both assembler and
* C code. Therefore we cannot annotate them always with
* 'UL' and other type specifiers unilaterally. We
* use the following macros to deal with this.
*
* Similarly, _AT() will cast an expression with a type in C, but
* leave it unchanged in asm.
*/
#ifdef __ASSEMBLY__
#define _AC(X,Y) X
#define _AT(T,X) X
#else
#define __AC(X,Y) (X##Y)
#define _AC(X,Y) __AC(X,Y)
#define _AT(T,X) ((T)(X))
#endif
#define _UL(x) (_AC(x, UL))
#define _ULL(x) (_AC(x, ULL))
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
#endif /* _UAPI_LINUX_CONST_H */

View File

@ -266,8 +266,8 @@ endef
define do_generate_dynamic_list_file
symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
xargs echo "U W w" | tr ' ' '\n' | sort -u | xargs echo`;\
if [ "$$symbol_type" = "U W w" ];then \
xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
if [ "$$symbol_type" = "U W" ];then \
(echo '{'; \
$(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\
echo '};'; \

View File

@ -280,9 +280,9 @@ endif
INC_FLAGS += -I$(src-perf)/lib/include
INC_FLAGS += -I$(src-perf)/util/include
INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
INC_FLAGS += -I$(srctree)/tools/include/uapi
INC_FLAGS += -I$(srctree)/tools/include/
INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
INC_FLAGS += -I$(srctree)/tools/include/uapi
INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/

View File

@ -1281,6 +1281,8 @@ int cmd_report(int argc, const char **argv)
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
has_br_stack = false;
setup_forced_leader(&report, session->evlist);

View File

@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
HEADERS='
include/uapi/linux/const.h
include/uapi/drm/drm.h
include/uapi/drm/i915_drm.h
include/uapi/linux/fadvise.h
@ -19,6 +20,7 @@ include/uapi/linux/usbdevice_fs.h
include/uapi/linux/vhost.h
include/uapi/sound/asound.h
include/linux/bits.h
include/linux/const.h
include/linux/hash.h
include/uapi/linux/hw_breakpoint.h
arch/x86/include/asm/disabled-features.h

View File

@ -59,7 +59,7 @@ else
CFLAGS := -g -Wall
endif
INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/ -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/ -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# Append required CFLAGS
override CFLAGS += $(EXTRA_WARNINGS)

View File

@ -347,6 +347,8 @@ static int __ui_browser__refresh(struct ui_browser *browser)
SLsmg_fill_region(browser->y + row + browser->extra_title_lines, browser->x,
browser->rows - row, width, ' ');
if (browser->nr_entries == 0 && browser->no_samples_msg)
__ui__info_window(NULL, browser->no_samples_msg, NULL);
return 0;
}

View File

@ -23,6 +23,7 @@ struct ui_browser {
void *priv;
const char *title;
char *helpline;
const char *no_samples_msg;
void (*refresh_dimensions)(struct ui_browser *browser);
unsigned int (*refresh)(struct ui_browser *browser);
void (*write)(struct ui_browser *browser, void *entry, int row);

View File

@ -2894,6 +2894,9 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
if (symbol_conf.col_width_list_str)
perf_hpp__set_user_width(symbol_conf.col_width_list_str);
if (!is_report_browser(hbt))
browser->b.no_samples_msg = "Collecting samples...";
while (1) {
struct thread *thread = NULL;
struct map *map = NULL;

View File

@ -162,8 +162,7 @@ int ui_browser__input_window(const char *title, const char *text, char *input,
return key;
}
int ui__question_window(const char *title, const char *text,
const char *exit_msg, int delay_secs)
void __ui__info_window(const char *title, const char *text, const char *exit_msg)
{
int x, y;
int max_len = 0, nr_lines = 0;
@ -185,10 +184,10 @@ int ui__question_window(const char *title, const char *text,
t = sep + 1;
}
pthread_mutex_lock(&ui__lock);
max_len += 2;
nr_lines += 4;
nr_lines += 2;
if (exit_msg)
nr_lines += 2;
y = SLtt_Screen_Rows / 2 - nr_lines / 2,
x = SLtt_Screen_Cols / 2 - max_len / 2;
@ -199,18 +198,34 @@ int ui__question_window(const char *title, const char *text,
SLsmg_write_string((char *)title);
}
SLsmg_gotorc(++y, x);
nr_lines -= 2;
if (exit_msg)
nr_lines -= 2;
max_len -= 2;
SLsmg_write_wrapped_string((unsigned char *)text, y, x,
nr_lines, max_len, 1);
SLsmg_gotorc(y + nr_lines - 2, x);
SLsmg_write_nstring((char *)" ", max_len);
SLsmg_gotorc(y + nr_lines - 1, x);
SLsmg_write_nstring((char *)exit_msg, max_len);
if (exit_msg) {
SLsmg_gotorc(y + nr_lines - 2, x);
SLsmg_write_nstring((char *)" ", max_len);
SLsmg_gotorc(y + nr_lines - 1, x);
SLsmg_write_nstring((char *)exit_msg, max_len);
}
}
void ui__info_window(const char *title, const char *text)
{
pthread_mutex_lock(&ui__lock);
__ui__info_window(title, text, NULL);
SLsmg_refresh();
pthread_mutex_unlock(&ui__lock);
}
int ui__question_window(const char *title, const char *text,
const char *exit_msg, int delay_secs)
{
pthread_mutex_lock(&ui__lock);
__ui__info_window(title, text, exit_msg);
SLsmg_refresh();
pthread_mutex_unlock(&ui__lock);
return ui__getch(delay_secs);
}

View File

@ -8,6 +8,8 @@ int ui__getch(int delay_secs);
int ui__popup_menu(int argc, char * const argv[]);
int ui__help_window(const char *text);
int ui__dialog_yesno(const char *msg);
void __ui__info_window(const char *title, const char *text, const char *exit_msg);
void ui__info_window(const char *title, const char *text);
int ui__question_window(const char *title, const char *text,
const char *exit_msg, int delay_secs);

View File

@ -1076,6 +1076,35 @@ bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
return !!etmq->etm->timeless_decoding;
}
static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
u64 trace_chan_id,
const struct cs_etm_packet *packet,
struct perf_sample *sample)
{
/*
* It's pointless to read instructions for the CS_ETM_DISCONTINUITY
* packet, so directly bail out with 'insn_len' = 0.
*/
if (packet->sample_type == CS_ETM_DISCONTINUITY) {
sample->insn_len = 0;
return;
}
/*
* T32 instruction size might be 32-bit or 16-bit, decide by calling
* cs_etm__t32_instr_size().
*/
if (packet->isa == CS_ETM_ISA_T32)
sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
sample->ip);
/* Otherwise, A64 and A32 instruction size are always 32-bit. */
else
sample->insn_len = 4;
cs_etm__mem_access(etmq, trace_chan_id, sample->ip,
sample->insn_len, (void *)sample->insn);
}
static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
struct cs_etm_traceid_queue *tidq,
u64 addr, u64 period)
@ -1097,9 +1126,10 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
sample.period = period;
sample.cpu = tidq->packet->cpu;
sample.flags = tidq->prev_packet->flags;
sample.insn_len = 1;
sample.cpumode = event->sample.header.misc;
cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
if (etm->synth_opts.last_branch) {
cs_etm__copy_last_branch_rb(etmq, tidq);
sample.branch_stack = tidq->last_branch;
@ -1159,6 +1189,9 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
sample.flags = tidq->prev_packet->flags;
sample.cpumode = event->sample.header.misc;
cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
&sample);
/*
* perf report cannot handle events without a branch stack
*/

View File

@ -23,6 +23,13 @@ struct perf_sample_id {
struct hlist_node node;
u64 id;
struct evsel *evsel;
/*
* 'idx' will be used for AUX area sampling. A sample will have AUX area
* data that will be queued for decoding, where there are separate
* queues for each CPU (per-cpu tracing) or task (per-thread tracing).
* The sample ID can be used to lookup 'idx' which is effectively the
* queue number.
*/
int idx;
int cpu;
pid_t tid;

View File

@ -30,6 +30,7 @@ static const struct branch_mode branch_modes[] = {
BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
BRANCH_END
};

View File

@ -1051,23 +1051,30 @@ static void callchain__printf(struct evsel *evsel,
i, callchain->ips[i]);
}
static void branch_stack__printf(struct perf_sample *sample)
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
{
uint64_t i;
printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
printf("%s: nr:%" PRIu64 "\n",
!callstack ? "... branch stack" : "... branch callstack",
sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++) {
struct branch_entry *e = &sample->branch_stack->entries[i];
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
i, e->from, e->to,
(unsigned short)e->flags.cycles,
e->flags.mispred ? "M" : " ",
e->flags.predicted ? "P" : " ",
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved);
if (!callstack) {
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
i, e->from, e->to,
(unsigned short)e->flags.cycles,
e->flags.mispred ? "M" : " ",
e->flags.predicted ? "P" : " ",
e->flags.abort ? "A" : " ",
e->flags.in_tx ? "T" : " ",
(unsigned)e->flags.reserved);
} else {
printf("..... %2"PRIu64": %016" PRIx64 "\n",
i, i > 0 ? e->from : e->to);
}
}
}
@ -1217,8 +1224,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
if (evsel__has_callchain(evsel))
callchain__printf(evsel, sample);
if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
branch_stack__printf(sample);
if (sample_type & PERF_SAMPLE_BRANCH_STACK)
branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel));
if (sample_type & PERF_SAMPLE_REGS_USER)
regs_user__printf(sample);