From 6e98bc349ea4219e21785d85809b10bd49e722df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Aug 2019 12:01:38 -0300 Subject: [PATCH 01/17] tools headers: Add limits.h to access __WORDSIZE We need to make sure limits.h is included before checking if we can use __WORDSIZE, do it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-5yfoed4rnsck2n3cwhm9mvth@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bitops.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 0b0ef3abc966..140c8362f113 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITOPS_H_ #include +#include #ifndef __WORDSIZE #define __WORDSIZE (__SIZEOF_LONG__ * 8) #endif From 146dc303630aff5fdf006614a67704539c519c33 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 11:11:30 -0300 Subject: [PATCH 02/17] perf tools: tools/include should come before tools/uapi/include The next cset will grap const.h copies from the kernel to keep bits.h in sync as it started to use linux/const.h, that in turn includes uapi/linux/const.h. So now we have a file with the same name in tools/include and tools/uapi/include, and one includes the other, we need to have tools/include/uapi/ after tools/include/ for this to work, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-qzjqxa1wdrt51kwadyqawnuj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 9a06787fedc6..bf8caa7d17f6 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -280,9 +280,9 @@ endif INC_FLAGS += -I$(src-perf)/lib/include INC_FLAGS += -I$(src-perf)/util/include INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include -INC_FLAGS += -I$(srctree)/tools/include/uapi INC_FLAGS += -I$(srctree)/tools/include/ INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +INC_FLAGS += -I$(srctree)/tools/include/uapi INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ From aaa6ef8aa85f33c6dd593e139b7f7d901bbde0e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 11:00:54 -0300 Subject: [PATCH 03/17] tools headers: Grab copy of linux/const.h, needed by linux/bits.h So that can update the copy of linux/bits.h that now uses macros defined in const.h and that are not available in older systems. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-c2qfcbl58hxyfb5u5xivp7is@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/const.h | 9 +++++++++ tools/include/uapi/linux/const.h | 31 +++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 2 ++ 3 files changed, 42 insertions(+) create mode 100644 tools/include/linux/const.h create mode 100644 tools/include/uapi/linux/const.h diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h new file mode 100644 index 000000000000..7b55a55f5911 --- /dev/null +++ b/tools/include/linux/const.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_CONST_H +#define _LINUX_CONST_H + +#include + +#define UL(x) (_UL(x)) +#define ULL(x) (_ULL(x)) + +#endif /* _LINUX_CONST_H */ diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h new file mode 100644 index 000000000000..5ed721ad5b19 --- /dev/null +++ b/tools/include/uapi/linux/const.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* const.h: Macros for dealing with constants. */ + +#ifndef _UAPI_LINUX_CONST_H +#define _UAPI_LINUX_CONST_H + +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specifiers unilaterally. We + * use the following macros to deal with this. + * + * Similarly, _AT() will cast an expression with a type in C, but + * leave it unchanged in asm. + */ + +#ifdef __ASSEMBLY__ +#define _AC(X,Y) X +#define _AT(T,X) X +#else +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#define _AT(T,X) ((T)(X)) +#endif + +#define _UL(x) (_AC(x, UL)) +#define _ULL(x) (_AC(x, ULL)) + +#define _BITUL(x) (_UL(1) << (x)) +#define _BITULL(x) (_ULL(1) << (x)) + +#endif /* _UAPI_LINUX_CONST_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index f211c015cb76..5308b3836278 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 HEADERS=' +include/uapi/linux/const.h include/uapi/drm/drm.h include/uapi/drm/i915_drm.h include/uapi/linux/fadvise.h @@ -19,6 +20,7 @@ include/uapi/linux/usbdevice_fs.h include/uapi/linux/vhost.h include/uapi/sound/asound.h include/linux/bits.h +include/linux/const.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h From b658911731d41a91b9de5458d6f38d6e1c90a453 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 11:14:28 -0300 Subject: [PATCH 04/17] tools headers: Synchronize linux/bits.h with the kernel sources To pick up the changes in this cset: 95b980d62d52 ("linux/bits.h: make BIT(), GENMASK(), and friends available in assembly") To address this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/linux/bits.h' differs from latest version at 'include/linux/bits.h' diff -u tools/include/linux/bits.h include/linux/bits.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-1if3iga5r3di6oyddgxsr225@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bits.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 2b7b532c1d51..669d69441a62 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -1,13 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H + +#include #include -#define BIT(nr) (1UL << (nr)) -#define BIT_ULL(nr) (1ULL << (nr)) -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT(nr) (UL(1) << (nr)) +#define BIT_ULL(nr) (ULL(1) << (nr)) +#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) +#define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 @@ -17,10 +19,11 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #define GENMASK(h, l) \ - (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + (((~UL(0)) - (UL(1) << (l)) + 1) & \ + (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ - (((~0ULL) - (1ULL << (l)) + 1) & \ - (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ + (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #endif /* __LINUX_BITS_H */ From 0ac10d87a571ae7d68c7f70f1c2229388f1dce9e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 10:53:20 -0300 Subject: [PATCH 05/17] tools arch x86: Sync asm/cpufeatures.h with the with the kernel To pick up the changes in: f36cf386e3fe ("x86/speculation/swapgs: Exclude ATOMs from speculation through SWAPGS") 18ec54fdd6d1 ("x86/speculation: Prepare entry code for Spectre v1 swapgs mitigations") That don't affect anything in tools/. This silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Thomas Gleixner Link: https://lkml.kernel.org/n/tip-860dq1qie2cpnfghlpcnxrzr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 998c2cc08363..e880f2408e29 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -281,6 +281,8 @@ #define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ #define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -394,5 +396,6 @@ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #endif /* _ASM_X86_CPUFEATURES_H */ From 3c84e65a533dbaa1a29bfd847deca73704b675eb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 12 Aug 2019 12:09:35 +0300 Subject: [PATCH 06/17] perf evsel: Add comment for 'idx' member in 'struct perf_sample_id The 'idx' member was added as preparation for AUX area sampling. Add a comment to describe why. Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/83ff264f-84c3-5372-8976-dd9293d20c6f@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9cd6e3ae479a..efe08065838f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -23,6 +23,13 @@ struct perf_sample_id { struct hlist_node node; u64 id; struct evsel *evsel; + /* + * 'idx' will be used for AUX area sampling. A sample will have AUX area + * data that will be queued for decoding, where there are separate + * queues for each CPU (per-cpu tracing) or task (per-thread tracing). + * The sample ID can be used to lookup 'idx' which is effectively the + * queue number. + */ int idx; int cpu; pid_t tid; From 82a2f88458d70704be843961e10b5cef9a6e95d3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 5 Aug 2019 13:01:50 -0400 Subject: [PATCH 07/17] tools lib traceevent: Fix "robust" test of do_generate_dynamic_list_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tools/lib/traceevent/Makefile had a test added to it to detect a failure of the "nm" when making the dynamic list file (whatever that is). The problem is that the test sorts the values "U W w" and some versions of sort will place "w" ahead of "W" (even though it has a higher ASCII value, and break the test. Add 'tr "w" "W"' to merge the two and not worry about the ordering. Reported-by: Tzvetomir Stoyanov Signed-off-by: Steven Rostedt (VMware) Cc: Alexander Shishkin Cc: David Carrillo-Cisneros Cc: He Kuang Cc: Jiri Olsa Cc: Michal rarek Cc: Paul Turner Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Uwe Kleine-König Cc: Wang Nan Cc: stable@vger.kernel.org Fixes: 6467753d61399 ("tools lib traceevent: Robustify do_generate_dynamic_list_file") Link: http://lkml.kernel.org/r/20190805130150.25acfeb1@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 3292c290654f..8352d53dcb5a 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -266,8 +266,8 @@ endef define do_generate_dynamic_list_file symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ - xargs echo "U W w" | tr ' ' '\n' | sort -u | xargs echo`;\ - if [ "$$symbol_type" = "U W w" ];then \ + xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ + if [ "$$symbol_type" = "U W" ];then \ (echo '{'; \ $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;\ echo '};'; \ From 2566349648b40aa3f5edf6af8f7f893ccd6e4eae Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Fri, 9 Aug 2019 18:23:58 +0300 Subject: [PATCH 08/17] perf record: Enable LBR callstack capture jointly with thread stack Enable '-j stack' applicability together with '--call-graph dwarf' option so thread stack data and LBR call stack could be captured jointly: $ perf record -g --call-graph dwarf,1024 -j stack,u -- stack_test Collected LBR call stack can be used to augment DWARF call stack calculated from the raw thread stack data and to provide more comprehensive call stack information for cases when collected SIZE is not enough to cover complete thread stack. Such cases are typical for workloads that allocate large arrays of data on its threads stacks or the possible SIZE to collect can't be large enough due to workload nature or system configuration and this is where hardware captured LBR call stacks can provide missing stack frames. Possible DWARF plus LBR call stacks consolidation algorithm description follows. With this patch set perf report command UI currently ignores collected LBR call stack data and still provides DWARF based call stacks information. =========================================================================== Overview: Legend: THS - thread stack CTX - thread register context SWS - software stack SSF - skipped stack frames PSS - Perf sample stack ip,sp,bp - HW registers values d - allocated stack regions kip - ip address in the kernel space K - captured thread stack size THS ----- | |<-stack bottom ... |---| |ip4| |---| PSS = SWS(THS(K)) | | --> | | | |d3 | user/ | |---| user PSS kernel PSS | |ip3| ------ ------ | |---| |SSF | |SSF | | | | .... .... | | | ------ ------ | |d2 | | -1 | | -1 | |---| user ------ ------ K |ip2| CTX |ip3 | |ip3 | |---| |----| |----| | |d1 | ... |ip2 | , |ip2 | | |---| |---| |----| |----| | |ip1| |bp0| |ip1 | |ip1 | | |---| |---| |----| |----| | | | |ip0|->|ip0 | |ip0 |<-user stack top | | | |---| ------ ------ | | |<-|sp0|<-stack |kip0|<-kernel stack bottom --> ----- ----- top |----| |kip1| |----| |kip2| |----| .... | |<-kernel stack top ------ Algorithm details: Legend: HWS - hardware stack K-SWS - kernel software stack BRANCH TABLE HWS ip ip from to ------ ----------- |ip7`| |ip7`| | |----| |----|----| |ip6`| |ip6`| | user PSS |----| |----|----| |ip5`| |ip5`| | ------ |----| |----|----| | -1 | |ip4`| |ip4`| | ------ |----| |----|----| |ip3 |~~~|ip3`| |ip3`| | |----| |----| |----|----| |ip2 |~~~|ip2`| |ip2`| | |----| |----| |----|----| |ip1 |~~~|ip1`| |ip1`|ip0`| |----| |----| ----------- |ip0 |~~~|ip0`|<---------' ------ ------ 1. if (sym(ipj) == sym(ipj`)), j=0-3 ===> user PSS 2. ipj` , j=4-7 ===> user PSS Augmented PSS = A_SWS(SWS(THS(K)), HWS): user/ user PSS kernel PSS ------ ------ |ip7`| |ip7`|<-user PSS bottom |----| |----| |ip6`| |ip6`| |----| |----| HWS |ip5`| |ip5`| |----| |----| |ip4`| |ip4`| ------ ------ |ip3 | |ip3 | |----| |----| SWS |ip2 | |ip2 | |----| |----| |ip1 | |ip1 | |----| |----| |ip0 | |ip0 |<-user PSS top ------ ------ |kip0|<-kernel PSS bottom |----| |kip1| K-SWS |----| |kip2| |----| |kip3|<-kernel PSS top ------ APSS Committer testing: Before: # perf record -g --call-graph dwarf,1024 -j stack,u ls > /dev/null unknown branch filter stack, check man page Usage: perf record [] [] or: perf record [] -- [] -j, --branch-filter branch stack filter modes # perf record -g --call-graph dwarf,1024 -j u ls > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.054 MB perf.data (12 samples) ] # perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|PERIOD|BRANCH_STACK|REGS_USER|STACK_USER|DATA_SRC, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY, sample_regs_user: 0xff0fff, sample_stack_user: 1024 # After: # perf record -g --call-graph dwarf,1024 -j stack,u ls > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.044 MB perf.data (11 samples) ] [root@quaco ~]# perf evlist -v cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|PERIOD|BRANCH_STACK|REGS_USER|STACK_USER|DATA_SRC, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: USER|CALL_STACK, sample_regs_user: 0xff0fff, sample_stack_user: 1024 # Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/e9e00090-66fb-d2a4-c90f-1d12344f7788@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-branch-options.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index 726e8d9e8c54..4ed20c833d44 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -30,6 +30,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), + BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), BRANCH_END }; From d2720c3dad58def723f9617f7cf2a48c752ef50a Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Fri, 9 Aug 2019 18:26:30 +0300 Subject: [PATCH 09/17] perf report: Dump LBR callstack data by -D jointly with thread stack Make perf report -D command print captured LBR callstack chain when it is collected together with raw thread stack data: 2752673087247083 0x5d10 [0x548]: PERF_RECORD_SAMPLE(IP, 0x4002): 5841/5841: 0x40121f period: 1543862 addr: 0 ... FP chain: nr:0 ... branch callstack: nr:3 ..... 0: 00000000004011d0 ..... 1: 00007f393c388411 ..... 2: 0000000000401098 ... user regs: mask 0xff0fff ABI 64-bit .... AX 0x34e7 .... BX 0x7fff5f6dd3c0 .... CX 0xffffffff .... DX 0x34e6 .... SI 0x7f393c5268d0 .... DI 0x0 .... BP 0x401260 .... SP 0x7fff5f6dd3c0 .... IP 0x40121f .... FLAGS 0x29f .... CS 0x33 .... SS 0x2b .... R8 0x7f393c526800 .... R9 0x7f393c525da0 .... R10 0xfffffffffffff70a .... R11 0x246 .... R12 0x401070 .... R13 0x7fff5f6ddcb0 .... R14 0x0 .... R15 0x0 ... ustack: size 1024, offset 0x130 . data_src: 0x5080021 ... thread: stack_test:5841 ...... dso: /root/abudanko/stacks/stack_test Committer testing: # perf record -g --call-graph dwarf,1024 -j stack,u ls > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.042 MB perf.data (10 samples) ] # Before: # perf report -D |& grep PERF_RECORD_SAMPLE -A28 | tail -29 67538909824483 0xa7a0 [0x560]: PERF_RECORD_SAMPLE(IP, 0x4002): 9721/9721: 0x7f441b2b1e20 period: 1376095 addr: 0 ... FP chain: nr:0 ... user regs: mask 0xff0fff ABI 64-bit .... AX 0x7f441b2b1000 .... BX 0x7f441b55b970 .... CX 0x7fff6e2db218 .... DX 0x7fff6e2db218 .... SI 0x7fff6e2db208 .... DI 0x1 .... BP 0x1 .... SP 0x7fff6e2db178 .... IP 0x7f441b2b1e20 .... FLAGS 0x20a .... CS 0x33 .... SS 0x2b .... R8 0x1 .... R9 0x7f441b371c18 .... R10 0x7f441b5a5f10 .... R11 0x202 .... R12 0x7fff6e2db208 .... R13 0x7fff6e2db218 .... R14 0x7f441b5a7150 .... R15 0x0 ... ustack: size 1024, offset 0x148 . data_src: 0x5080021 ... thread: ls:9721 ...... dso: /usr/lib64/libpthread-2.29.so 0xad00 [0x60]: event: 10 # After: # perf report -D |& grep PERF_RECORD_SAMPLE -A31 | tail -32 67538909824483 0xa7a0 [0x560]: PERF_RECORD_SAMPLE(IP, 0x4002): 9721/9721: 0x7f441b2b1e20 period: 1376095 addr: 0 ... FP chain: nr:0 ... branch callstack: nr:4 ..... 0: 00007f441b2b1e20 ..... 1: 00007f441b58af1a ..... 2: 00007f441b58b0e1 ..... 3: 00007f441b57c145 ... user regs: mask 0xff0fff ABI 64-bit .... AX 0x7f441b2b1000 .... BX 0x7f441b55b970 .... CX 0x7fff6e2db218 .... DX 0x7fff6e2db218 .... SI 0x7fff6e2db208 .... DI 0x1 .... BP 0x1 .... SP 0x7fff6e2db178 .... IP 0x7f441b2b1e20 .... FLAGS 0x20a .... CS 0x33 .... SS 0x2b .... R8 0x1 .... R9 0x7f441b371c18 .... R10 0x7f441b5a5f10 .... R11 0x202 .... R12 0x7fff6e2db208 .... R13 0x7fff6e2db218 .... R14 0x7f441b5a7150 .... R15 0x0 ... ustack: size 1024, offset 0x148 . data_src: 0x5080021 ... thread: ls:9721 ...... dso: /usr/lib64/libpthread-2.29.so # Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/aa82e5dd-def2-0ca8-a064-db9e2e8ad076@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b9fe71d11bf6..82e0438a9160 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1051,23 +1051,30 @@ static void callchain__printf(struct evsel *evsel, i, callchain->ips[i]); } -static void branch_stack__printf(struct perf_sample *sample) +static void branch_stack__printf(struct perf_sample *sample, bool callstack) { uint64_t i; - printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); + printf("%s: nr:%" PRIu64 "\n", + !callstack ? "... branch stack" : "... branch callstack", + sample->branch_stack->nr); for (i = 0; i < sample->branch_stack->nr; i++) { struct branch_entry *e = &sample->branch_stack->entries[i]; - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", - i, e->from, e->to, - (unsigned short)e->flags.cycles, - e->flags.mispred ? "M" : " ", - e->flags.predicted ? "P" : " ", - e->flags.abort ? "A" : " ", - e->flags.in_tx ? "T" : " ", - (unsigned)e->flags.reserved); + if (!callstack) { + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", + i, e->from, e->to, + (unsigned short)e->flags.cycles, + e->flags.mispred ? "M" : " ", + e->flags.predicted ? "P" : " ", + e->flags.abort ? "A" : " ", + e->flags.in_tx ? "T" : " ", + (unsigned)e->flags.reserved); + } else { + printf("..... %2"PRIu64": %016" PRIx64 "\n", + i, i > 0 ? e->from : e->to); + } } } @@ -1217,8 +1224,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (evsel__has_callchain(evsel)) callchain__printf(evsel, sample); - if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel)) - branch_stack__printf(sample); + if (sample_type & PERF_SAMPLE_BRANCH_STACK) + branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample); From 10ccbc1cc0b8a05a5c8491630d36d1e2672036c1 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Fri, 9 Aug 2019 18:31:28 +0300 Subject: [PATCH 10/17] perf report: Prefer DWARF callstacks to LBR ones when captured both Display DWARF based callchains when the perf.data file contains raw thread stack data as LBR callstack data. Commiter testing: This changes the output from the branch stack based one, i.e. without this patch, for the same file as in the previous csets: # perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 13 of event 'cycles' # Event count (approx.): 13 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ....... .................... ........................... ......................................... .................. # 7.69% ls libpthread-2.29.so [.] _init [.] __pthread_initialize_minimal_internal 6827 7.69% ls ld-2.29.so [k] _start [k] _dl_start - 7.69% ls ld-2.29.so [.] _dl_start_user [.] _dl_init -24790 7.69% ls ld-2.29.so [k] _dl_start [k] _dl_sysdep_start 278 7.69% ls ld-2.29.so [k] dl_main [k] _dl_map_object_deps 15581 7.69% ls ld-2.29.so [k] open_verify.constprop.0 [k] lseek64 4228 7.69% ls ld-2.29.so [k] _dl_map_object [k] open_verify.constprop.0 55 7.69% ls ld-2.29.so [k] openaux [k] _dl_map_object 67 7.69% ls ld-2.29.so [k] _dl_map_object_deps [k] 0x00007f441b57c090 112 7.69% ls ld-2.29.so [.] call_init.part.0 [.] _init 334 7.69% ls ld-2.29.so [.] _dl_init [.] call_init.part.0 383 7.69% ls ld-2.29.so [k] _dl_sysdep_start [k] dl_main 45 7.69% ls ld-2.29.so [k] _dl_catch_exception [k] openaux 116 # # (Tip: For memory address profiling, try: perf mem record / perf mem report) # To the one that shows call chains: # perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 10 of event 'cycles' # Event count (approx.): 3204047 # # Children Self Command Shared Object Symbol # ........ ........ ....... .................. ......................................... # 55.01% 0.00% ls [kernel.vmlinux] [k] entry_SYSCALL_64_after_hwframe | ---entry_SYSCALL_64_after_hwframe do_syscall_64 | --16.01%--__x64_sys_execve __do_execve_file.isra.0 search_binary_handler load_elf_binary elf_map vm_mmap_pgoff do_mmap mmap_region perf_event_mmap perf_iterate_sb perf_iterate_ctx perf_event_mmap_output perf_output_copy memcpy_erms 55.01% 39.00% ls [kernel.vmlinux] [k] do_syscall_64 | |--39.00%--0xffffffffffffffff | _dl_map_object | open_verify.constprop.0 | __lseek64 (inlined) | entry_SYSCALL_64_after_hwframe | do_syscall_64 | --16.01%--do_syscall_64 __x64_sys_execve __do_execve_file.isra.0 search_binary_handler load_elf_binary elf_map vm_mmap_pgoff do_mmap mmap_region perf_event_mmap perf_iterate_sb perf_iterate_ctx perf_event_mmap_output perf_output_copy memcpy_erms 42.95% 42.95% ls libpthread-2.29.so [.] __pthread_initialize_minimal_internal | ---_init __pthread_initialize_minimal_internal 42.95% 0.00% ls libpthread-2.29.so [.] _init | ---_init __pthread_initialize_minimal_internal # # (Tip: Profiling branch (mis)predictions with: perf record -b / perf report) # # The branch stack view be explicitely selected using: # perf report -h branch-stack Usage: perf report [] -b, --branch-stack use branch records for per branch histogram filling # I.e. after this patch: # perf report -b --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 13 of event 'cycles' # Event count (approx.): 13 # # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ....... .................... ........................... ......................................... .................. # 7.69% ls libpthread-2.29.so [.] _init [.] __pthread_initialize_minimal_internal 6827 7.69% ls ld-2.29.so [k] _start [k] _dl_start - 7.69% ls ld-2.29.so [.] _dl_start_user [.] _dl_init -24790 7.69% ls ld-2.29.so [k] _dl_start [k] _dl_sysdep_start 278 7.69% ls ld-2.29.so [k] dl_main [k] _dl_map_object_deps 15581 7.69% ls ld-2.29.so [k] open_verify.constprop.0 [k] lseek64 4228 7.69% ls ld-2.29.so [k] _dl_map_object [k] open_verify.constprop.0 55 7.69% ls ld-2.29.so [k] openaux [k] _dl_map_object 67 7.69% ls ld-2.29.so [k] _dl_map_object_deps [k] 0x00007f441b57c090 112 7.69% ls ld-2.29.so [.] call_init.part.0 [.] _init 334 7.69% ls ld-2.29.so [.] _dl_init [.] call_init.part.0 383 7.69% ls ld-2.29.so [k] _dl_sysdep_start [k] dl_main 45 7.69% ls ld-2.29.so [k] _dl_catch_exception [k] openaux 116 # # (Tip: Show current config key-value pairs: perf config --list) # # Signed-off-by: Alexey Budankov Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/ccbd9583-82f4-dec5-7e84-64bf56e351fb@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5e003d02821e..79dfb1139f94 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1281,6 +1281,8 @@ int cmd_report(int argc, const char **argv) has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); + if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER) + has_br_stack = false; setup_forced_leader(&report, session->evlist); From a4973d8f7bea98a0795ba853e7bd2cef11363824 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 15 Aug 2019 16:28:54 +0800 Subject: [PATCH 11/17] perf cs-etm: Support sample flags 'insn' and 'insnlen' The synthetic branch and instruction samples are missed to set instruction related info, thus the perf tool fails to display samples with flags '-F,+insn,+insnlen'. The CoreSight trace decoder provides sufficient information to decide the instruction size based on the ISA type: A64/A32 instructions are 32-bit size, but one exception is the T32 instruction size, which might be 32-bit or 16-bit. This patch handles these cases and it reads the instruction values from DSO file; thus can support the flags '-F,+insn,+insnlen'. Before: # perf script -F,insn,insnlen,ip,sym 0 [unknown] ilen: 0 ffff97174044 _start ilen: 0 ffff97174938 _dl_start ilen: 0 ffff97174938 _dl_start ilen: 0 ffff97174938 _dl_start ilen: 0 ffff97174938 _dl_start ilen: 0 ffff97174938 _dl_start ilen: 0 ffff97174938 _dl_start ilen: 0 ffff97174938 _dl_start ilen: 0 ffff97174938 _dl_start ilen: 0 [...] After: # perf script -F,insn,insnlen,ip,sym 0 [unknown] ilen: 0 ffff97174044 _start ilen: 4 insn: 2f 02 00 94 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 ffff97174938 _dl_start ilen: 4 insn: c1 ff ff 54 [...] Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Tested-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mike Leach Cc: Namhyung Kim Cc: Robert Walker Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190815082854.18191-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index ed6f7fd5b90b..b3a5daaf1a8f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1076,6 +1076,35 @@ bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) return !!etmq->etm->timeless_decoding; } +static void cs_etm__copy_insn(struct cs_etm_queue *etmq, + u64 trace_chan_id, + const struct cs_etm_packet *packet, + struct perf_sample *sample) +{ + /* + * It's pointless to read instructions for the CS_ETM_DISCONTINUITY + * packet, so directly bail out with 'insn_len' = 0. + */ + if (packet->sample_type == CS_ETM_DISCONTINUITY) { + sample->insn_len = 0; + return; + } + + /* + * T32 instruction size might be 32-bit or 16-bit, decide by calling + * cs_etm__t32_instr_size(). + */ + if (packet->isa == CS_ETM_ISA_T32) + sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, + sample->ip); + /* Otherwise, A64 and A32 instruction size are always 32-bit. */ + else + sample->insn_len = 4; + + cs_etm__mem_access(etmq, trace_chan_id, sample->ip, + sample->insn_len, (void *)sample->insn); +} + static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) @@ -1097,9 +1126,10 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.period = period; sample.cpu = tidq->packet->cpu; sample.flags = tidq->prev_packet->flags; - sample.insn_len = 1; sample.cpumode = event->sample.header.misc; + cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); + if (etm->synth_opts.last_branch) { cs_etm__copy_last_branch_rb(etmq, tidq); sample.branch_stack = tidq->last_branch; @@ -1159,6 +1189,9 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, sample.flags = tidq->prev_packet->flags; sample.cpumode = event->sample.header.misc; + cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, + &sample); + /* * perf report cannot handle events without a branch stack */ From 9e79ff77e4195e40e7a47a2001132db66e25d541 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 16:09:50 -0300 Subject: [PATCH 12/17] perf ui: Make 'exit_msg' optional in ui__question_window() We will not need it when refactoring this function to be non-interactive, so make it optional. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-pnx1dn17bsz7lqt9ty95nnjx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/tui/util.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index fe5e571816fc..5d65ea8b6496 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -188,7 +188,9 @@ int ui__question_window(const char *title, const char *text, pthread_mutex_lock(&ui__lock); max_len += 2; - nr_lines += 4; + nr_lines += 2; + if (exit_msg) + nr_lines += 2; y = SLtt_Screen_Rows / 2 - nr_lines / 2, x = SLtt_Screen_Cols / 2 - max_len / 2; @@ -199,14 +201,20 @@ int ui__question_window(const char *title, const char *text, SLsmg_write_string((char *)title); } SLsmg_gotorc(++y, x); - nr_lines -= 2; + if (exit_msg) + nr_lines -= 2; max_len -= 2; SLsmg_write_wrapped_string((unsigned char *)text, y, x, nr_lines, max_len, 1); SLsmg_gotorc(y + nr_lines - 2, x); SLsmg_write_nstring((char *)" ", max_len); SLsmg_gotorc(y + nr_lines - 1, x); - SLsmg_write_nstring((char *)exit_msg, max_len); + if (exit_msg) { + SLsmg_gotorc(y + nr_lines - 2, x); + SLsmg_write_nstring((char *)" ", max_len); + SLsmg_gotorc(y + nr_lines - 1, x); + SLsmg_write_nstring((char *)exit_msg, max_len); + } SLsmg_refresh(); pthread_mutex_unlock(&ui__lock); From 9b01611934c045ac7ca71aebf5ee1906951d6bce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 16:38:24 -0300 Subject: [PATCH 13/17] perf ui: Introduce non-interactive ui__info_window() function Sometimes we want just to print a message on the center of the screen, like in 'perf top' while we wait for the minimum amount of samples to be collected before sorting and showing them. Also expose __ui__info_window() as an optimization for cases where such message is to be printed while holding the ui lock. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-uat0f89vfwl2w52kv9wzwd8a@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/tui/util.c | 23 +++++++++++++++-------- tools/perf/ui/util.h | 2 ++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index 5d65ea8b6496..1163df8b6f06 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -162,8 +162,7 @@ int ui_browser__input_window(const char *title, const char *text, char *input, return key; } -int ui__question_window(const char *title, const char *text, - const char *exit_msg, int delay_secs) +void __ui__info_window(const char *title, const char *text, const char *exit_msg) { int x, y; int max_len = 0, nr_lines = 0; @@ -185,8 +184,6 @@ int ui__question_window(const char *title, const char *text, t = sep + 1; } - pthread_mutex_lock(&ui__lock); - max_len += 2; nr_lines += 2; if (exit_msg) @@ -206,19 +203,29 @@ int ui__question_window(const char *title, const char *text, max_len -= 2; SLsmg_write_wrapped_string((unsigned char *)text, y, x, nr_lines, max_len, 1); - SLsmg_gotorc(y + nr_lines - 2, x); - SLsmg_write_nstring((char *)" ", max_len); - SLsmg_gotorc(y + nr_lines - 1, x); if (exit_msg) { SLsmg_gotorc(y + nr_lines - 2, x); SLsmg_write_nstring((char *)" ", max_len); SLsmg_gotorc(y + nr_lines - 1, x); SLsmg_write_nstring((char *)exit_msg, max_len); } +} + +void ui__info_window(const char *title, const char *text) +{ + pthread_mutex_lock(&ui__lock); + __ui__info_window(title, text, NULL); SLsmg_refresh(); - pthread_mutex_unlock(&ui__lock); +} +int ui__question_window(const char *title, const char *text, + const char *exit_msg, int delay_secs) +{ + pthread_mutex_lock(&ui__lock); + __ui__info_window(title, text, exit_msg); + SLsmg_refresh(); + pthread_mutex_unlock(&ui__lock); return ui__getch(delay_secs); } diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h index 5e44223b56fa..40891942f465 100644 --- a/tools/perf/ui/util.h +++ b/tools/perf/ui/util.h @@ -8,6 +8,8 @@ int ui__getch(int delay_secs); int ui__popup_menu(int argc, char * const argv[]); int ui__help_window(const char *text); int ui__dialog_yesno(const char *msg); +void __ui__info_window(const char *title, const char *text, const char *exit_msg); +void ui__info_window(const char *title, const char *text); int ui__question_window(const char *title, const char *text, const char *exit_msg, int delay_secs); From 2284cf8074ff12b6304665618f81ced3aa0b41de Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 16:43:58 -0300 Subject: [PATCH 14/17] perf ui browser: Allow specifying message to show when no samples are available to display The 'perf top' tool will use that to avoid having a initial blank screen while collecting the minimum number of samples to sort and display. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-89ciceg8cy4442he3t0jzo3f@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 2 ++ tools/perf/ui/browser.h | 1 + 2 files changed, 3 insertions(+) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index d227d74b28f8..c797a853d3a0 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -347,6 +347,8 @@ static int __ui_browser__refresh(struct ui_browser *browser) SLsmg_fill_region(browser->y + row + browser->extra_title_lines, browser->x, browser->rows - row, width, ' '); + if (browser->nr_entries == 0 && browser->no_samples_msg) + __ui__info_window(NULL, browser->no_samples_msg, NULL); return 0; } diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index dc1444136658..3678eb88f119 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -23,6 +23,7 @@ struct ui_browser { void *priv; const char *title; char *helpline; + const char *no_samples_msg; void (*refresh_dimensions)(struct ui_browser *browser); unsigned int (*refresh)(struct ui_browser *browser); void (*write)(struct ui_browser *browser, void *entry, int row); From 5c959b6d8f9369e42a3cb0423bcce9ac069cee55 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Aug 2019 16:48:25 -0300 Subject: [PATCH 15/17] perf top: Show info message while collecting samples Give visual cue about what is happening while initially collecting the minimal set of samples to collect/sort/display. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-xcui60p1v6ozijfam2o89ya8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b195b1ba625b..30547fdb0787 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2894,6 +2894,9 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, if (symbol_conf.col_width_list_str) perf_hpp__set_user_width(symbol_conf.col_width_list_str); + if (!is_report_browser(hbt)) + browser->b.no_samples_msg = "Collecting samples..."; + while (1) { struct thread *thread = NULL; struct map *map = NULL; From 42fc2e9ef9603a7948aaa4ffd8dfb94b30294ad8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Aug 2019 11:45:17 -0300 Subject: [PATCH 16/17] tools headers: Fixup bitsperlong per arch includes We were getting the file by luck, from one of the paths in -I, fix it to get it from the proper place: $ cd tools/include/uapi/asm/ [acme@quaco asm]$ grep include bitsperlong.h #include "../../arch/x86/include/uapi/asm/bitsperlong.h" #include "../../arch/arm64/include/uapi/asm/bitsperlong.h" #include "../../arch/powerpc/include/uapi/asm/bitsperlong.h" #include "../../arch/s390/include/uapi/asm/bitsperlong.h" #include "../../arch/sparc/include/uapi/asm/bitsperlong.h" #include "../../arch/mips/include/uapi/asm/bitsperlong.h" #include "../../arch/ia64/include/uapi/asm/bitsperlong.h" #include "../../arch/riscv/include/uapi/asm/bitsperlong.h" #include "../../arch/alpha/include/uapi/asm/bitsperlong.h" #include $ ls -la ../../arch/x86/include/uapi/asm/bitsperlong.h ls: cannot access '../../arch/x86/include/uapi/asm/bitsperlong.h': No such file or directory $ ls -la ../../../arch/*/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 237 ../../../arch/alpha/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 841 ../../../arch/arm64/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 966 ../../../arch/hexagon/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 234 ../../../arch/ia64/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 100 ../../../arch/microblaze/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 244 ../../../arch/mips/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 352 ../../../arch/parisc/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 312 ../../../arch/powerpc/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 353 ../../../arch/riscv/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 292 ../../../arch/s390/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 323 ../../../arch/sparc/include/uapi/asm/bitsperlong.h -rw-rw-r--. 1 320 ../../../arch/x86/include/uapi/asm/bitsperlong.h $ Found while fixing some other problem, before it was escaping the tools/ chroot and using stuff in the kernel sources: CC /tmp/build/perf/util/find_bit.o In file included from /git/linux/tools/include/../../arch/x86/include/uapi/asm/bitsperlong.h:11, from /git/linux/tools/include/uapi/asm/bitsperlong.h:3, from /git/linux/tools/include/linux/bits.h:6, from /git/linux/tools/include/linux/bitops.h:13, from ../lib/find_bit.c:17: # cd /git/linux/tools/include/../../arch/x86/include/uapi/asm/ # pwd /git/linux/arch/x86/include/uapi/asm # Now it is getting the one we want it to, i.e. the one inside tools/: CC /tmp/build/perf/util/find_bit.o In file included from /git/linux/tools/arch/x86/include/uapi/asm/bitsperlong.h:11, from /git/linux/tools/include/linux/bits.h:6, from /git/linux/tools/include/linux/bitops.h:13, Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-8f8cfqywmf6jk8a3ucr0ixhu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm/bitsperlong.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h index 57aaeaf8e192..edba4d93e9e6 100644 --- a/tools/include/uapi/asm/bitsperlong.h +++ b/tools/include/uapi/asm/bitsperlong.h @@ -1,22 +1,22 @@ /* SPDX-License-Identifier: GPL-2.0 */ #if defined(__i386__) || defined(__x86_64__) -#include "../../arch/x86/include/uapi/asm/bitsperlong.h" +#include "../../../arch/x86/include/uapi/asm/bitsperlong.h" #elif defined(__aarch64__) -#include "../../arch/arm64/include/uapi/asm/bitsperlong.h" +#include "../../../arch/arm64/include/uapi/asm/bitsperlong.h" #elif defined(__powerpc__) -#include "../../arch/powerpc/include/uapi/asm/bitsperlong.h" +#include "../../../arch/powerpc/include/uapi/asm/bitsperlong.h" #elif defined(__s390__) -#include "../../arch/s390/include/uapi/asm/bitsperlong.h" +#include "../../../arch/s390/include/uapi/asm/bitsperlong.h" #elif defined(__sparc__) -#include "../../arch/sparc/include/uapi/asm/bitsperlong.h" +#include "../../../arch/sparc/include/uapi/asm/bitsperlong.h" #elif defined(__mips__) -#include "../../arch/mips/include/uapi/asm/bitsperlong.h" +#include "../../../arch/mips/include/uapi/asm/bitsperlong.h" #elif defined(__ia64__) -#include "../../arch/ia64/include/uapi/asm/bitsperlong.h" +#include "../../../arch/ia64/include/uapi/asm/bitsperlong.h" #elif defined(__riscv) -#include "../../arch/riscv/include/uapi/asm/bitsperlong.h" +#include "../../../arch/riscv/include/uapi/asm/bitsperlong.h" #elif defined(__alpha__) -#include "../../arch/alpha/include/uapi/asm/bitsperlong.h" +#include "../../../arch/alpha/include/uapi/asm/bitsperlong.h" #else #include #endif From b81d39c7a1efb83caa3f4419939a46e96191abb6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 20 Aug 2019 14:46:24 +0200 Subject: [PATCH 17/17] libperf: Fix arch include paths Guenter Roeck reported problem with compilation when the ARCH is specified: $ make ARCH=x86_64 In file included from tools/include/asm/atomic.h:6:0, from include/linux/atomic.h:5, from tools/include/linux/refcount.h:41, from cpumap.c:4: tools/include/asm/../../arch/x86/include/asm/atomic.h:11:10: fatal error: asm/cmpxchg.h: No such file or directory The problem is that we don't use SRCARCH (the sanitized ARCH version) and we don't get the proper include path. Reported-by: Guenter Roeck Signed-off-by: Jiri Olsa Tested-by: Guenter Roeck Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 314350491810 ("libperf: Make libperf.a part of the perf build") Link: http://lkml.kernel.org/r/20190820124624.GG24105@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index 8a9ae50818e4..a67efb8d9d39 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -59,7 +59,7 @@ else CFLAGS := -g -Wall endif -INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/ -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/ -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # Append required CFLAGS override CFLAGS += $(EXTRA_WARNINGS)