From 9dc9a95f03a69ab926d9ff1986ab2087f34a5dce Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Tue, 3 Apr 2018 21:18:33 +0300 Subject: [PATCH 01/17] perf stat: Enable 1ms interval for printing event counters values Currently print count interval for performance counters values is limited by 10ms so reading the values at frequencies higher than 100Hz is restricted by the tool. This change makes perf stat -I possible on frequencies up to 1KHz and, to some extent, makes perf stat -I to be on-par with perf record sampling profiling. When running perf stat -I for monitoring e.g. PCIe uncore counters and at the same time profiling some I/O workload by perf record e.g. for cpu-cycles and context switches, it is then possible to observe consolidated CPU/OS/IO(Uncore) performance picture for that workload. Tool overhead warning printed when specifying -v option can be missed due to screen scrolling in case you have output to the console so message is moved into help available by running perf stat -h. Signed-off-by: Alexey Budankov Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/b842ad6a-d606-32e4-afe5-974071b5198e@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 2 +- tools/perf/builtin-stat.c | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index f15b306be183..e6c3b4e555c2 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -153,7 +153,7 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m -I msecs:: --interval-print msecs:: -Print count deltas every N milliseconds (minimum: 10ms) +Print count deltas every N milliseconds (minimum: 1ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5c454855908..147a27e8c937 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1943,7 +1943,8 @@ static const struct option stat_options[] = { OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &stat_config.interval, - "print counts at regular interval in ms (>= 10)"), + "print counts at regular interval in ms " + "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, @@ -2923,17 +2924,6 @@ int cmd_stat(int argc, const char **argv) } } - if (interval && interval < 100) { - if (interval < 10) { - pr_err("print interval must be >= 10ms\n"); - parse_options_usage(stat_usage, stat_options, "I", 1); - goto out; - } else - pr_warning("print interval < 100ms. " - "The overhead percentage could be high in some cases. " - "Please proceed with caution.\n"); - } - if (stat_config.times && interval) interval_count = true; else if (stat_config.times && !interval) { From 4d3b57da1593c66835d8e3a757e4751b35493fb8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 4 Apr 2018 17:34:45 +0100 Subject: [PATCH 02/17] tools headers: Restore READ_ONCE() C++ compatibility Our userspace defines READ_ONCE() in a way that clang doesn't like, as we have an anonymous union in which neither field is initialized. WRITE_ONCE() is fine since it initializes the __val field. For READ_ONCE() we can keep clang and GCC happy with a dummy initialization of the __c field, so let's do that. At the same time, let's split READ_ONCE() and WRITE_ONCE() over several lines for legibility, as we do in the in-kernel . Reported-by: Li Zhijian Reported-by: Sandipan Das Tested-by: Sandipan Das Signed-off-by: Mark Rutland Fixes: 6aa7de059173a986 ("locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE()") Link: http://lkml.kernel.org/r/20180404163445.16492-1-mark.rutland@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 04e32f965ad7..1827c2f973f9 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -151,11 +151,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * required ordering. */ -#define READ_ONCE(x) \ - ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) -#define WRITE_ONCE(x, val) \ - ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) #ifndef __fallthrough From af72cfb80af5e4cafd8e0b58ac54f222c913aa1b Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 10 Apr 2018 19:16:24 -0500 Subject: [PATCH 03/17] perf tests: Run dwarf unwind test on arm32 Enable the unwind test on arm32: $ perf test unwind 58: DWARF unwind : Ok Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Brian Robbins Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180410191624.a3a468670dd4548c66d3d094@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/include/arch-tests.h | 12 ++++++++++++ tools/perf/arch/arm/tests/Build | 2 ++ tools/perf/arch/arm/tests/arch-tests.c | 16 ++++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 tools/perf/arch/arm/include/arch-tests.h create mode 100644 tools/perf/arch/arm/tests/arch-tests.c diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h new file mode 100644 index 000000000000..90ec4c8cb880 --- /dev/null +++ b/tools/perf/arch/arm/include/arch-tests.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index b30eff9bcc83..883c57ff0c08 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build @@ -1,2 +1,4 @@ libperf-y += regs_load.o libperf-y += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c new file mode 100644 index 000000000000..5b1543c98022 --- /dev/null +++ b/tools/perf/arch/arm/tests/arch-tests.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, +#endif + { + .func = NULL, + }, +}; From 592c10e217f3edb35c7e0deba161fef69ad1a336 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 10:30:03 -0300 Subject: [PATCH 04/17] perf annotate: Allow showing offsets in more than just jump targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jesper wanted to see offsets at callq sites when doing some performance investigation related to retpolines, so save him some time by providing an 'struct annotation_options' to control where offsets should appear: just on jump targets? That + call instructions? All? This puts in place the logic to show the offsets, now we need to wire this up in the TUI browser (next patch) and on the 'perf annotate --stdio2" interface, where we need a more general mechanism to setup the 'annotation_options' struct from the command line. Suggested-by: Jesper Dangaard Brouer Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Linus Torvalds Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-m3jc9c3swobye9tj08gnh5i7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 11 +++++++++-- tools/perf/util/annotate.h | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index fbad8dfbb186..5edc565d86c4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -46,6 +46,7 @@ struct annotation_options annotation__default_options = { .use_offset = true, .jump_arrows = true, + .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS, }; const char *disassembler_style; @@ -2512,7 +2513,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!notes->options->use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { - if (al->jump_sources) { + if (al->jump_sources && + notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { if (notes->options->show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", @@ -2523,9 +2525,14 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati obj__printf(obj, bf); obj__set_color(obj, prev); } - +print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); + } else if (ins__is_call(&disasm_line(al)->ins) && + notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + goto print_addr; + } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index db8d09bea07e..f28a9e43421d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -70,8 +70,17 @@ struct annotation_options { show_nr_jumps, show_nr_samples, show_total_period; + u8 offset_level; }; +enum { + ANNOTATION__OFFSET_JUMP_TARGETS = 1, + ANNOTATION__OFFSET_CALL, + ANNOTATION__MAX_OFFSET_LEVEL, +}; + +#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS + extern struct annotation_options annotation__default_options; struct annotation; From 51f39603b5f260c73635f4d06d390476b32df6a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 10:41:23 -0300 Subject: [PATCH 05/17] perf annotate browser: Allow showing offsets in more than just jump targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jesper wanted to see offsets at callq sites when doing some performance investigation related to retpolines, so save him some time by providing a 'O' hotkey to allow showing offsets from function start at call instructions or in all instructions, just go on pressing 'O' till the offsets you need appear. Example: Starts with: Samples: 64 of event 'cycles:ppp', 100000 Hz, Event count (approx.): 318963 ixgbe_read_reg /proc/kcore Percent│ ↑ je 2a │ ┌──cmp $0xffffffff,%r13d │ ├──je d0 │ │ mov $0x53e3,%edi │ │→ callq __const_udelay │ │ sub $0x1,%r15d │ │↑ jne 83 │ │ mov 0x8(%rbp),%rax │ │ testb $0x20,0x1799(%rax) │ │↑ je 2a │ │ mov 0x200(%rax),%rdi │ │ mov %r13d,%edx │ │ mov $0xffffffffc02595d8,%rsi │ │→ callq netdev_warn │ │↑ jmpq 2a │d0:└─→mov 0x8(%rbp),%rsi │ mov %rbp,%rdi │ mov %eax,0x4(%rsp) │ → callq ixgbe_remove_adapter.isra.77 │ mov 0x4(%rsp),%eax Press 'h' for help on key bindings ============================================================================ Pess 'O': Samples: 64 of event 'cycles:ppp', 100000 Hz, Event count (approx.): 318963 ixgbe_read_reg /proc/kcore Percent│ ↑ je 2a │ ┌──cmp $0xffffffff,%r13d │ ├──je d0 │ │ mov $0x53e3,%edi │99:│→ callq __const_udelay │ │ sub $0x1,%r15d │ │↑ jne 83 │ │ mov 0x8(%rbp),%rax │ │ testb $0x20,0x1799(%rax) │ │↑ je 2a │ │ mov 0x200(%rax),%rdi │ │ mov %r13d,%edx │ │ mov $0xffffffffc02595d8,%rsi │c6:│→ callq netdev_warn │ │↑ jmpq 2a │d0:└─→mov 0x8(%rbp),%rsi │ mov %rbp,%rdi │ mov %eax,0x4(%rsp) │db: → callq ixgbe_remove_adapter.isra.77 │ mov 0x4(%rsp),%eax Press 'h' for help on key bindings ============================================================================ Press 'O' again: Samples: 64 of event 'cycles:ppp', 100000 Hz, Event count (approx.): 318963 ixgbe_read_reg /proc/kcore Percent│8c: ↑ je 2a │8e:┌──cmp $0xffffffff,%r13d │92:├──je d0 │94:│ mov $0x53e3,%edi │99:│→ callq __const_udelay │9e:│ sub $0x1,%r15d │a2:│↑ jne 83 │a4:│ mov 0x8(%rbp),%rax │a8:│ testb $0x20,0x1799(%rax) │af:│↑ je 2a │b5:│ mov 0x200(%rax),%rdi │bc:│ mov %r13d,%edx │bf:│ mov $0xffffffffc02595d8,%rsi │c6:│→ callq netdev_warn │cb:│↑ jmpq 2a │d0:└─→mov 0x8(%rbp),%rsi │d4: mov %rbp,%rdi │d7: mov %eax,0x4(%rsp) │db: → callq ixgbe_remove_adapter.isra.77 │e0: mov 0x4(%rsp),%eax Press 'h' for help on key bindings ============================================================================ Press 'O' again and it will show just jump target offsets. Suggested-by: Jesper Dangaard Brouer Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Linus Torvalds Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-upp6pfdetwlsx18ec2uf1od4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 12c099a87f8b..3781d74088a7 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -692,6 +692,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "J Toggle showing number of jump sources on targets\n" "n Search next string\n" "o Toggle disassembler output/simplified view\n" + "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n" "s Toggle source code view\n" "t Circulate percent, total period, samples view\n" "/ Search string\n" @@ -719,6 +720,10 @@ static int annotate_browser__run(struct annotate_browser *browser, notes->options->use_offset = !notes->options->use_offset; annotation__update_column_widths(notes); continue; + case 'O': + if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + continue; case 'j': notes->options->jump_arrows = !notes->options->jump_arrows; continue; From e14b733c5dc62f574b4dbc045b2cc52b03d83d4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 12:08:53 -0300 Subject: [PATCH 06/17] perf jvmti: Give hints about package names needed to build Give as examples of package names to install to have this built for fedora and debian, to help the user a bit. The part from 'e.g.:' onwards: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: William Cohen Link: https://lkml.kernel.org/n/tip-edbi4r2pvzn7no6ebxbtczng@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c7abd83a8e19..6b307e97dc57 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -847,7 +847,7 @@ ifndef NO_JVMTI ifeq ($(feature-jvmti), 1) $(call detected_var,JDIR) else - $(warning No openjdk development package found, please install JDK package) + $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel) NO_JVMTI := 1 endif endif From c13009c1ef3a94cfea212c86bbb94c8361e5de0c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 17:57:32 -0300 Subject: [PATCH 07/17] perf tests bpf: Remove unused ptrace.h include from LLVM test The bpf-script-test-kbuild.c script, used in one of the LLVM subtests, includes ptrace.h unnecessarily, and that ends up making it include a header that uses asm(_ASM_SP), a feature that is not supported by clang <= 4.0, breaking that 'perf test' entry. This ended up leading to the ca26cffa4e4a ("x86/asm: Allow again using asm.h when building for the 'bpf' clang target"), adding an ifndef __BPF__ to the arch/x86/include/asm/asm.h file. Newer clang versions accept that asm(_ASM_SP) construct, so just remove the ptrace.h include, which paves the way for reverting ca26cffa4e4a ("x86/asm: Allow again using asm.h when building for the 'bpf' clang target"). Suggested-by: Yonghong Song Acked-by: Yonghong Song Link: https://lkml.kernel.org/r/613f0a0d-c433-8f4d-dcc1-c9889deae39e@fb.com Cc: Adrian Hunter Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Ahern Cc: Dmitriy Vyukov Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-clbcnzbakdp18ibme4wt43ib@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-test-kbuild.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c index 3626924740d8..ff3ec8337f0a 100644 --- a/tools/perf/tests/bpf-script-test-kbuild.c +++ b/tools/perf/tests/bpf-script-test-kbuild.c @@ -9,7 +9,6 @@ #define SEC(NAME) __attribute__((section(NAME), used)) #include -#include SEC("func=vfs_llseek") int bpf_func__vfs_llseek(void *ctx) From fd97d39b0aa49a4beb429aec344604c1b689f089 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Apr 2018 18:03:33 -0300 Subject: [PATCH 08/17] Revert "x86/asm: Allow again using asm.h when building for the 'bpf' clang target" This reverts commit ca26cffa4e4aaeb09bb9e308f95c7835cb149248. Newer clang versions accept that asm(_ASM_SP) construct, and now that the bpf-script-test-kbuild.c script, used in one of the 'perf test LLVM' subtests doesn't include ptrace.h, which ended up including arch/x86/include/asm/asm.h, we can revert this patch. Suggested-by: Yonghong Song Link: https://lkml.kernel.org/r/613f0a0d-c433-8f4d-dcc1-c9889deae39e@fb.com Acked-by: Yonghong Song Cc: Adrian Hunter Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Ahern Cc: Dmitriy Vyukov Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-nqozcv8loq40tkqpfw997993@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/asm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 386a6900e206..219faaec51df 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,7 +136,6 @@ #endif #ifndef __ASSEMBLY__ -#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -146,6 +145,5 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif -#endif #endif /* _ASM_X86_ASM_H */ From 90ce61b91903f9c357cbceced45d41642f2aa812 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 9 Apr 2018 18:26:47 +0800 Subject: [PATCH 09/17] perf script: Use HAVE_LIBXXX_SUPPORT to replace NO_LIBXXX In Makefile.config, we define the conditional compilation variables HAVE_LIBPERL_SUPPORT and HAVE_LIBPYTHON_SUPPORT. To make the C code more consistent, this patch replaces NO_LIBPERL/NO_LIBPYTHON in C code with HAVE_LIBPERL_SUPPORT/ HAVE_LIBPYTHON_SUPPORT. Signed-off-by: Jin Yao Suggested-by: Ingo Molnar Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1523269609-28824-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++-- tools/perf/util/trace-event-scripting.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 313c42423393..b17edbcd98cc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2801,11 +2801,11 @@ int find_scripts(char **scripts_array, char **scripts_path_array) for_each_lang(scripts_path, scripts_dir, lang_dirent) { scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, lang_dirent->d_name); -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT if (strstr(lang_path, "perl")) continue; #endif -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT if (strstr(lang_path, "python")) continue; #endif diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 0ac9077f62a2..b1e5c3a2b8e3 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -98,7 +98,7 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT void setup_python_scripting(void) { register_python_scripting(&python_scripting_unsupported_ops); @@ -161,7 +161,7 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT void setup_perl_scripting(void) { register_perl_scripting(&perl_scripting_unsupported_ops); From 22e9af4e94801bbdf6945e55db64b877be7c71b3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 9 Apr 2018 18:26:48 +0800 Subject: [PATCH 10/17] perf tools: Rename HAVE_SYSCALL_TABLE to HAVE_SYSCALL_TABLE_SUPPORT To be consistent with other HAVE_XXX_SUPPORT uses in Makefile.config, this patch renames HAVE_SYSCALL_TABLE to HAVE_SYSCALL_TABLE_SUPPORT and updates the C code accordingly. Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1523269609-28824-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/builtin-help.c | 2 +- tools/perf/perf.c | 4 ++-- tools/perf/util/generate-cmdlist.sh | 2 +- tools/perf/util/syscalltbl.c | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6b307e97dc57..ae7dc46e8f8a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -68,7 +68,7 @@ ifeq ($(NO_PERF_REGS),0) endif ifneq ($(NO_SYSCALL_TABLE),1) - CFLAGS += -DHAVE_SYSCALL_TABLE + CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT endif # So far there's only x86 and arm libdw unwind support merged in perf. diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 4aca13f23b9d..1c41b4eaf73c 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv) #ifdef HAVE_LIBELF_SUPPORT "probe", #endif -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) "trace", #endif NULL }; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 1659029d03fc..20a08cb32332 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -73,7 +73,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -491,7 +491,7 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv); diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index ff17920a5ebc..c3cef36d4176 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 895122d638dd..0ee7f568d60c 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -17,7 +17,7 @@ #include #include -#ifdef HAVE_SYSCALL_TABLE +#ifdef HAVE_SYSCALL_TABLE_SUPPORT #include #include "string2.h" #include "util.h" @@ -139,7 +139,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#else /* HAVE_SYSCALL_TABLE */ +#else /* HAVE_SYSCALL_TABLE_SUPPORT */ #include @@ -176,4 +176,4 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g { return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#endif /* HAVE_SYSCALL_TABLE */ +#endif /* HAVE_SYSCALL_TABLE_SUPPORT */ From 8a812bf552d98f6f887f860d3910f201b4a97b26 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 9 Apr 2018 18:26:49 +0800 Subject: [PATCH 11/17] perf version: Print status for syscall_table This patch doesn't print "libaudit" line if HAVE_SYSCALL_TABLE_SUPPORT is available and add a line for HAVE_SYSCALL_TABLE_SUPPORT. For example, $ ./perf -vv perf version 4.13.rc5.gc2f8af9 dwarf: [ on ] # HAVE_DWARF_SUPPORT dwarf_getlocations: [ on ] # HAVE_DWARF_GETLOCATIONS_SUPPORT glibc: [ on ] # HAVE_GLIBC_SUPPORT gtk2: [ on ] # HAVE_GTK2_SUPPORT syscall_table: [ on ] # HAVE_SYSCALL_TABLE_SUPPORT libbfd: [ on ] # HAVE_LIBBFD_SUPPORT libelf: [ on ] # HAVE_LIBELF_SUPPORT libnuma: [ on ] # HAVE_LIBNUMA_SUPPORT numa_num_possible_cpus: [ on ] # HAVE_LIBNUMA_SUPPORT libperl: [ on ] # HAVE_LIBPERL_SUPPORT libpython: [ on ] # HAVE_LIBPYTHON_SUPPORT libslang: [ on ] # HAVE_SLANG_SUPPORT libcrypto: [ on ] # HAVE_LIBCRYPTO_SUPPORT libunwind: [ on ] # HAVE_LIBUNWIND_SUPPORT libdw-dwarf-unwind: [ on ] # HAVE_DWARF_SUPPORT zlib: [ on ] # HAVE_ZLIB_SUPPORT lzma: [ on ] # HAVE_LZMA_SUPPORT get_cpuid: [ on ] # HAVE_AUXTRACE_SUPPORT bpf: [ on ] # HAVE_LIBBPF_SUPPORT The line "syscall_table: [ on ] # HAVE_SYSCALL_TABLE_SUPPORT" is new created. Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1523269609-28824-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-version.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 2abe3910d6b6..50df168be326 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -60,7 +60,10 @@ static void library_status(void) STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations); STATUS(HAVE_GLIBC_SUPPORT, glibc); STATUS(HAVE_GTK2_SUPPORT, gtk2); +#ifndef HAVE_SYSCALL_TABLE_SUPPORT STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit); +#endif + STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table); STATUS(HAVE_LIBBFD_SUPPORT, libbfd); STATUS(HAVE_LIBELF_SUPPORT, libelf); STATUS(HAVE_LIBNUMA_SUPPORT, libnuma); From e8103e44cebe2ef891bc3a5c6c4b74854846968b Mon Sep 17 00:00:00 2001 From: Takuya Yamamoto Date: Tue, 10 Apr 2018 23:35:39 +0900 Subject: [PATCH 12/17] perf sched: Fix documentation for timehist Fixed a incorrect option and usage to those shown by "perf sched timehist -h", i.e. the default is really --call-graph, which is equivalent to -g. Signed-off-by: Takuya Yamamoto Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-8fzo0dlsi1mku5aqx8brep5s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index bb33601a823b..63f938b887dd 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -104,8 +104,8 @@ OPTIONS for 'perf sched timehist' kallsyms pathname -g:: ---no-call-graph:: - Do not display call chains if present. +--call-graph:: + Display call chains if present (default on). --max-stack:: Maximum number of functions to display in backtrace, default 5. From 4b163ca343b45855b03114ef2ab47c454989d55c Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 12 Apr 2018 21:51:40 +0530 Subject: [PATCH 13/17] perf tests: Disable breakpoint accounting test for powerpc We disable this test as instruction breakpoints (HW_BREAKPOINT_X) are not available for powerpc. Before applying patch: 21: Breakpoint accounting : --- start --- test child forked, pid 3635 failed opening event 0 failed opening event 0 watchpoints count 1, breakpoints count 0, has_ioctl 1, share 0 test child finished with -2 ---- end ---- Breakpoint accounting: Skip After applying patch: 21: Breakpoint accounting : Disabled Signed-off-by: Sandipan Das Cc: Jiri Olsa Cc: Naveen N. Rao Cc: Ravi Bangoria Link: http://lkml.kernel.org/r/20180412162140.2992-1-sandipan@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 625f5a6772af..cac8f8889bc3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -118,6 +118,7 @@ static struct test generic_tests[] = { { .desc = "Breakpoint accounting", .func = test__bp_accounting, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Number of exit events of a simple workload", From 4f75f1cbf95e2f0853cd229d042b203931b899af Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 12 Apr 2018 15:32:46 +0200 Subject: [PATCH 14/17] perf record: Change warning for missing sysfs entry to debug Using perf on 4.16.0 kernel on s390 shows this warning: failed: can't open node sysfs data each time I run command perf record ... for example: [root@s35lp76 perf]# ./perf record -e rB0000 -- sleep 1 [ perf record: Woken up 1 times to write data ] failed: can't open node sysfs data [ perf record: Captured and wrote 0.001 MB perf.data (4 samples) ] [root@s35lp76 perf]# It turns out commit e2091cedd51bf ("perf tools: Add MEM_TOPOLOGY feature to perf data file") tries to open directory named /sys/devices/system/node/ which does not exist on s390. This is the call stack: __cmd_record +---> perf_session__write_header +---> perf_header__adds_write +---> do_write_feat +---> write_mem_topology +---> build_mem_topology prints warning The issue starts in do_write_feat() which unconditionally loops over all features and now includes HEADER_MEM_TOPOLOGY and calls write_mem_topology(). Function record__init_features() at the beginning of __cmd_record() sets all features and then turns off some of them. Fix this by changing the warning to a level 2 debug output statement. So it is only shown when debug level 2 or higher is set. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180412133246.92801-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 121df1683c36..a8bff2178fbc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1320,7 +1320,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) dir = opendir(path); if (!dir) { - pr_warning("failed: can't open node sysfs data\n"); + pr_debug2("%s: could't read %s, does this arch have topology information?\n", + __func__, path); return -1; } From 7b366142a50ad79e48de8e67c5b3e8cfb9fa82dd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Apr 2018 14:58:24 -0300 Subject: [PATCH 15/17] perf report: Fix switching to another perf.data file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the TUI the 's' hotkey can be used to switch to another perf.data file in the current directory, but that got broken in Fixes: b01141f4f59c ("perf annotate: Initialize the priv are in symbol__new()"), that would show this once another file was chosen: ┌─Fatal Error─────────────────────────────────────┐ │Annotation needs to be init before symbol__init()│ │ │ │ │ │Press any key... │ └─────────────────────────────────────────────────┘ Fix it by just silently bailing out if symbol__annotation_init() was already called, just like is done with symbol__init(), i.e. they are done just once at session start, not when switching to a new perf.data file. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Fixes: b01141f4f59c ("perf annotate: Initialize the priv are in symbol__new()") Link: https://lkml.kernel.org/n/tip-ogppdtpzfax7y1h6gjdv5s6u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 62b2dd2253eb..1466814ebada 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2091,16 +2091,14 @@ static bool symbol__read_kptr_restrict(void) int symbol__annotation_init(void) { + if (symbol_conf.init_annotation) + return 0; + if (symbol_conf.initialized) { pr_err("Annotation needs to be init before symbol__init()\n"); return -1; } - if (symbol_conf.init_annotation) { - pr_warning("Annotation being initialized multiple times\n"); - return 0; - } - symbol_conf.priv_size += sizeof(struct annotation); symbol_conf.init_annotation = true; return 0; From 43c4023152a9c5742948ac919e58ade127fa4e2e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Apr 2018 15:23:02 -0300 Subject: [PATCH 16/17] perf annotate: Allow setting the offset level in .perfconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default is 1 (jump_target): # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave Samples: 3K of event 'cycles:ppp', 3000 Hz, Event count (approx.): 2766398574 _raw_spin_lock_irqsave() /proc/kcore 0.26 nop 4.61 push %rbx 19.33 pushfq 7.97 pop %rax 0.32 nop 0.06 mov %rax,%rbx 14.63 cli 0.06 nop xor %eax,%eax mov $0x1,%edx 49.94 lock cmpxchg %edx,(%rdi) 0.16 test %eax,%eax ↓ jne 2b 2.66 mov %rbx,%rax pop %rbx ← retq 2b: mov %eax,%esi → callq *ffffffffb30eaed0 mov %rbx,%rax pop %rbx ← retq # But one can ask for showing offsets for call instructions by setting this: # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave Samples: 3K of event 'cycles:ppp', 3000 Hz, Event count (approx.): 2766398574 _raw_spin_lock_irqsave() /proc/kcore 0.26 nop 4.61 push %rbx 19.33 pushfq 7.97 pop %rax 0.32 nop 0.06 mov %rax,%rbx 14.63 cli 0.06 nop xor %eax,%eax mov $0x1,%edx 49.94 lock cmpxchg %edx,(%rdi) 0.16 test %eax,%eax ↓ jne 2b 2.66 mov %rbx,%rax pop %rbx ← retq 2b: mov %eax,%esi 2d: → callq *ffffffffb30eaed0 mov %rbx,%rax pop %rbx ← retq # Or using a big value to ask for all offsets to be shown: # cat ~/.perfconfig [annotate] offset_level = 100 hide_src_code = true # perf annotate --ignore-vmlinux --stdio2 _raw_spin_lock_irqsave Samples: 3K of event 'cycles:ppp', 3000 Hz, Event count (approx.): 2766398574 _raw_spin_lock_irqsave() /proc/kcore 0.26 0: nop 4.61 5: push %rbx 19.33 6: pushfq 7.97 7: pop %rax 0.32 8: nop 0.06 d: mov %rax,%rbx 14.63 10: cli 0.06 11: nop 17: xor %eax,%eax 19: mov $0x1,%edx 49.94 1e: lock cmpxchg %edx,(%rdi) 0.16 22: test %eax,%eax 24: ↓ jne 2b 2.66 26: mov %rbx,%rax 29: pop %rbx 2a: ← retq 2b: mov %eax,%esi 2d: → callq *ffffffffb30eaed0 32: mov %rbx,%rax 35: pop %rbx 36: ← retq # This also affects the TUI, i.e. the default 'perf annotate' and 'perf top/report' -> A hotkey -> annotate interfaces, when slang-devel is present in the build, i.e.: # perf version --build-options | grep slang libslang: [ on ] # HAVE_SLANG_SUPPORT # Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-venm6x5zrt40eu8hxdsmqxz6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 +++++ tools/perf/util/annotate.c | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5b4fff3adc4b..32f4a898e3f2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -334,6 +334,11 @@ annotate.*:: 99.93 │ mov %eax,%eax + annotate.offset_level:: + Default is '1', meaning just jump targets will have offsets show right beside + the instruction. When set to '2' 'call' instructions will also have its offsets + shown, 3 or higher will show offsets for all instructions. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5edc565d86c4..536ee148bff8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2649,10 +2649,11 @@ int __annotation__scnprintf_samples_period(struct annotation *notes, */ static struct annotation_config { const char *name; - bool *value; + void *value; } annotation__configs[] = { ANNOTATION__CFG(hide_src_code), ANNOTATION__CFG(jump_arrows), + ANNOTATION__CFG(offset_level), ANNOTATION__CFG(show_linenr), ANNOTATION__CFG(show_nr_jumps), ANNOTATION__CFG(show_nr_samples), @@ -2684,8 +2685,16 @@ static int annotation__config(const char *var, const char *value, if (cfg == NULL) pr_debug("%s variable unknown, ignoring...", var); - else - *cfg->value = perf_config_bool(name, value); + else if (strcmp(var, "annotate.offset_level") == 0) { + perf_config_int(cfg->value, name, value); + + if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL; + else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL; + } else { + *(bool *)cfg->value = perf_config_bool(name, value); + } return 0; } From b0d5c81e872ed21de1e56feb0fa6e4161da7be61 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Apr 2018 16:28:18 -0300 Subject: [PATCH 17/17] perf annotate: Handle variables in 'sub', 'or' and many other instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like is done for 'mov' and others that can have as source or targets variables resolved by objdump, to make them more compact: - orb $0x4,0x224d71(%rip) # 226ca4 <_rtld_global+0xca4> + orb $0x4,_rtld_global+0xca4 Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Martin Liška Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-efex7746id4w4wa03nqxvh3m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/annotate/instructions.c | 67 ++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5bd1ba8c0282..44f5aba78210 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -1,21 +1,43 @@ // SPDX-License-Identifier: GPL-2.0 static struct ins x86__instructions[] = { + { .name = "adc", .ops = &mov_ops, }, + { .name = "adcb", .ops = &mov_ops, }, + { .name = "adcl", .ops = &mov_ops, }, { .name = "add", .ops = &mov_ops, }, { .name = "addl", .ops = &mov_ops, }, { .name = "addq", .ops = &mov_ops, }, + { .name = "addsd", .ops = &mov_ops, }, { .name = "addw", .ops = &mov_ops, }, { .name = "and", .ops = &mov_ops, }, + { .name = "andb", .ops = &mov_ops, }, + { .name = "andl", .ops = &mov_ops, }, + { .name = "andpd", .ops = &mov_ops, }, + { .name = "andps", .ops = &mov_ops, }, + { .name = "andq", .ops = &mov_ops, }, + { .name = "andw", .ops = &mov_ops, }, + { .name = "bsr", .ops = &mov_ops, }, + { .name = "bt", .ops = &mov_ops, }, + { .name = "btr", .ops = &mov_ops, }, { .name = "bts", .ops = &mov_ops, }, + { .name = "btsq", .ops = &mov_ops, }, { .name = "call", .ops = &call_ops, }, { .name = "callq", .ops = &call_ops, }, + { .name = "cmovbe", .ops = &mov_ops, }, + { .name = "cmove", .ops = &mov_ops, }, + { .name = "cmovae", .ops = &mov_ops, }, { .name = "cmp", .ops = &mov_ops, }, { .name = "cmpb", .ops = &mov_ops, }, { .name = "cmpl", .ops = &mov_ops, }, { .name = "cmpq", .ops = &mov_ops, }, { .name = "cmpw", .ops = &mov_ops, }, { .name = "cmpxch", .ops = &mov_ops, }, + { .name = "cmpxchg", .ops = &mov_ops, }, + { .name = "cs", .ops = &mov_ops, }, { .name = "dec", .ops = &dec_ops, }, { .name = "decl", .ops = &dec_ops, }, + { .name = "divsd", .ops = &mov_ops, }, + { .name = "divss", .ops = &mov_ops, }, + { .name = "gs", .ops = &mov_ops, }, { .name = "imul", .ops = &mov_ops, }, { .name = "inc", .ops = &dec_ops, }, { .name = "incl", .ops = &dec_ops, }, @@ -57,25 +79,68 @@ static struct ins x86__instructions[] = { { .name = "lea", .ops = &mov_ops, }, { .name = "lock", .ops = &lock_ops, }, { .name = "mov", .ops = &mov_ops, }, + { .name = "movapd", .ops = &mov_ops, }, + { .name = "movaps", .ops = &mov_ops, }, { .name = "movb", .ops = &mov_ops, }, { .name = "movdqa", .ops = &mov_ops, }, + { .name = "movdqu", .ops = &mov_ops, }, { .name = "movl", .ops = &mov_ops, }, { .name = "movq", .ops = &mov_ops, }, + { .name = "movsd", .ops = &mov_ops, }, { .name = "movslq", .ops = &mov_ops, }, + { .name = "movss", .ops = &mov_ops, }, + { .name = "movupd", .ops = &mov_ops, }, + { .name = "movups", .ops = &mov_ops, }, + { .name = "movw", .ops = &mov_ops, }, { .name = "movzbl", .ops = &mov_ops, }, { .name = "movzwl", .ops = &mov_ops, }, + { .name = "mulsd", .ops = &mov_ops, }, + { .name = "mulss", .ops = &mov_ops, }, { .name = "nop", .ops = &nop_ops, }, { .name = "nopl", .ops = &nop_ops, }, { .name = "nopw", .ops = &nop_ops, }, { .name = "or", .ops = &mov_ops, }, + { .name = "orb", .ops = &mov_ops, }, { .name = "orl", .ops = &mov_ops, }, + { .name = "orps", .ops = &mov_ops, }, + { .name = "orq", .ops = &mov_ops, }, + { .name = "pand", .ops = &mov_ops, }, + { .name = "paddq", .ops = &mov_ops, }, + { .name = "pcmpeqb", .ops = &mov_ops, }, + { .name = "por", .ops = &mov_ops, }, + { .name = "rclb", .ops = &mov_ops, }, + { .name = "rcll", .ops = &mov_ops, }, + { .name = "retq", .ops = &ret_ops, }, + { .name = "sbb", .ops = &mov_ops, }, + { .name = "sbbl", .ops = &mov_ops, }, + { .name = "sete", .ops = &mov_ops, }, + { .name = "sub", .ops = &mov_ops, }, + { .name = "subl", .ops = &mov_ops, }, + { .name = "subq", .ops = &mov_ops, }, + { .name = "subsd", .ops = &mov_ops, }, + { .name = "subw", .ops = &mov_ops, }, { .name = "test", .ops = &mov_ops, }, { .name = "testb", .ops = &mov_ops, }, { .name = "testl", .ops = &mov_ops, }, + { .name = "ucomisd", .ops = &mov_ops, }, + { .name = "ucomiss", .ops = &mov_ops, }, + { .name = "vaddsd", .ops = &mov_ops, }, + { .name = "vandpd", .ops = &mov_ops, }, + { .name = "vmovdqa", .ops = &mov_ops, }, + { .name = "vmovq", .ops = &mov_ops, }, + { .name = "vmovsd", .ops = &mov_ops, }, + { .name = "vmulsd", .ops = &mov_ops, }, + { .name = "vorpd", .ops = &mov_ops, }, + { .name = "vsubsd", .ops = &mov_ops, }, + { .name = "vucomisd", .ops = &mov_ops, }, { .name = "xadd", .ops = &mov_ops, }, { .name = "xbeginl", .ops = &jump_ops, }, { .name = "xbeginq", .ops = &jump_ops, }, - { .name = "retq", .ops = &ret_ops, }, + { .name = "xchg", .ops = &mov_ops, }, + { .name = "xor", .ops = &mov_ops, }, + { .name = "xorb", .ops = &mov_ops, }, + { .name = "xorpd", .ops = &mov_ops, }, + { .name = "xorps", .ops = &mov_ops, }, }; static bool x86__ins_is_fused(struct arch *arch, const char *ins1,