linux/tools/perf/builtin-annotate.c

634 lines
14 KiB
C
Raw Normal View History

/*
* builtin-annotate.c
*
* Builtin annotate command: Analyze the perf.data input file,
* look up and read DSOs and symbol information and display
* a histogram of results, along various sorting keys.
*/
#include "builtin.h"
#include "util/util.h"
#include "util/color.h"
#include <linux/list.h>
#include "util/cache.h"
#include <linux/rbtree.h>
#include "util/symbol.h"
#include "util/string.h"
#include "perf.h"
#include "util/debug.h"
#include "util/event.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/thread.h"
#include "util/sort.h"
#include "util/hist.h"
#include "util/session.h"
static char const *input_name = "perf.data";
static int force;
static int full_paths;
static int print_line;
struct sym_hist {
u64 sum;
u64 ip[0];
};
struct sym_ext {
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
struct rb_node node;
double percent;
char *path;
};
struct sym_priv {
struct sym_hist *hist;
struct sym_ext *ext;
};
static const char *sym_hist_filter;
static int sym__alloc_hist(struct symbol *self)
{
struct sym_priv *priv = symbol__priv(self);
const int size = (sizeof(*priv->hist) +
(self->end - self->start) * sizeof(u64));
priv->hist = zalloc(size);
return priv->hist == NULL ? -1 : 0;
}
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
/*
* collect histogram counts
*/
static int annotate__hist_hit(struct hist_entry *he, u64 ip)
{
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
unsigned int sym_size, offset;
struct symbol *sym = he->sym;
struct sym_priv *priv;
struct sym_hist *h;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
he->count++;
if (!sym || !he->map)
return 0;
priv = symbol__priv(sym);
if (priv->hist == NULL && sym__alloc_hist(sym) < 0)
return -ENOMEM;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
sym_size = sym->end - sym->start;
offset = ip - sym->start;
pr_debug3("%s: ip=%#Lx\n", __func__, he->map->unmap_ip(he->map, ip));
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
if (offset >= sym_size)
return 0;
h = priv->hist;
h->sum++;
h->ip[offset]++;
pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", he->sym->start,
he->sym->name, ip, ip - he->sym->start, h->ip[offset]);
return 0;
}
static int perf_session__add_hist_entry(struct perf_session *self,
struct addr_location *al, u64 count)
{
bool hit;
struct hist_entry *he;
if (sym_hist_filter != NULL &&
(al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
/* We're only interested in a symbol named sym_hist_filter */
if (al->sym != NULL) {
rb_erase(&al->sym->rb_node,
&al->map->dso->symbols[al->map->type]);
symbol__delete(al->sym);
}
return 0;
}
he = __perf_session__add_hist_entry(self, al, NULL, count, &hit);
if (he == NULL)
return -ENOMEM;
return annotate__hist_hit(he, al->addr);
}
static int process_sample_event(event_t *event, struct perf_session *session)
{
perf tools: Consolidate symbol resolving across all tools Now we have a very high level routine for simple tools to process IP sample events: int event__preprocess_sample(const event_t *self, struct addr_location *al, symbol_filter_t filter) It receives the event itself and will insert new threads in the global threads list and resolve the map and symbol, filling all this info into the new addr_location struct, so that tools like annotate and report can further process the event by creating hist_entries in their specific way (with or without callgraphs, etc). It in turn uses the new next layer function: void thread__find_addr_location(struct thread *self, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al, symbol_filter_t filter) This one will, given a thread (userspace or the kernel kthread one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE too in the near future) at the given cpumode, taking vdsos into account (userspace hit, but kernel symbol) and will fill all these details in the addr_location given. Tools that need a more compact API for plain function resolution, like 'kmem', can use this other one: struct symbol *thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter) So, to resolve a kernel symbol, that is all the 'kmem' tool needs, its just a matter of calling: sym = thread__find_function(kthread, addr, NULL); The 'filter' parameter is needed because we do lazy parsing/loading of ELF symtabs or /proc/kallsyms. With this we remove more code duplication all around, which is always good, huh? :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: John Kacur <jkacur@redhat.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-28 02:29:23 +08:00
struct addr_location al;
dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
event->ip.pid, event->ip.ip);
if (event__preprocess_sample(event, session, &al, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
if (!al.filtered && perf_session__add_hist_entry(session, &al, 1)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
return -1;
}
return 0;
}
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
struct objdump_line {
struct list_head node;
s64 offset;
char *line;
};
static struct objdump_line *objdump_line__new(s64 offset, char *line)
{
struct objdump_line *self = malloc(sizeof(*self));
if (self != NULL) {
self->offset = offset;
self->line = line;
}
return self;
}
static void objdump_line__free(struct objdump_line *self)
{
free(self->line);
free(self);
}
static void objdump__add_line(struct list_head *head, struct objdump_line *line)
{
list_add_tail(&line->node, head);
}
static struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
struct objdump_line *pos)
{
list_for_each_entry_continue(pos, head, node)
if (pos->offset >= 0)
return pos;
return NULL;
}
static int parse_line(FILE *file, struct hist_entry *he,
struct list_head *head)
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
{
struct symbol *sym = he->sym;
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
struct objdump_line *objdump_line;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
char *line = NULL, *tmp, *tmp2;
size_t line_len;
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
s64 line_ip, offset = -1;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
char *c;
if (getline(&line, &line_len, file) < 0)
return -1;
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
if (!line)
return -1;
c = strchr(line, '\n');
if (c)
*c = 0;
line_ip = -1;
/*
* Strip leading spaces:
*/
tmp = line;
while (*tmp) {
if (*tmp != ' ')
break;
tmp++;
}
if (*tmp) {
/*
* Parse hexa addresses followed by ':'
*/
line_ip = strtoull(tmp, &tmp2, 16);
if (*tmp2 != ':')
line_ip = -1;
}
if (line_ip != -1) {
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
u64 start = map__rip_2objdump(he->map, sym->start);
offset = line_ip - start;
}
objdump_line = objdump_line__new(offset, line);
if (objdump_line == NULL) {
free(line);
return -1;
}
objdump__add_line(head, objdump_line);
return 0;
}
static int objdump_line__print(struct objdump_line *self,
struct list_head *head,
struct hist_entry *he, u64 len)
{
struct symbol *sym = he->sym;
static const char *prev_line;
static const char *prev_color;
if (self->offset != -1) {
const char *path = NULL;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
unsigned int hits = 0;
double percent = 0.0;
perf: Enable more compiler warnings Related to a shadowed variable bug fix Valdis Kletnieks noticed that perf does not get built with -Wshadow, which could have helped us avoid the bug. So enable -Wshadow and also enable the following warnings on perf builds, in addition to the already enabled -Wall -Wextra -std=gnu99 warnings: -Wcast-align -Wformat=2 -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement And change/fix the perf code to build cleanly under GCC 4.3.2. The list of warnings enablement is rather arbitrary: it's based on my (quick) reading of the GCC manpages and trying them on perf. I categorized the warnings based on individually enabling them and looking whether they trigger something in the perf build. If i liked those warnings (i.e. if they trigger for something that arguably could be improved) i enabled the warning. If the warnings seemed to come from language laywers spamming the build with tons of nuisance warnings i generally kept them off. Most of the sign conversion related warnings were in this category. (A second patch enabling some of the sign warnings might be welcome - sign bugs can be nasty.) I also kept warnings that seem to make sense from their manpage description and which produced no actual warnings on our code base. These warnings might still be turned off if they end up being a nuisance. I also left out a few warnings that are not supported in older compilers. [ Note that these changes might break the build on older compilers i did not test, or on non-x86 architectures that produce different warnings, so more testing would be welcome. ] Reported-by: Valdis.Kletnieks@vt.edu Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-08-15 18:26:57 +08:00
const char *color;
struct sym_priv *priv = symbol__priv(sym);
struct sym_ext *sym_ext = priv->ext;
struct sym_hist *h = priv->hist;
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
s64 offset = self->offset;
struct objdump_line *next = objdump__get_next_ip_line(head, self);
while (offset < (s64)len &&
(next == NULL || offset < next->offset)) {
if (sym_ext) {
if (path == NULL)
path = sym_ext[offset].path;
percent += sym_ext[offset].percent;
} else
hits += h->ip[offset];
++offset;
}
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
if (sym_ext == NULL && h->sum)
percent = 100.0 * hits / h->sum;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
color = get_percent_color(percent);
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
/*
* Also color the filename and line if needed, with
* the same color than the percentage. Don't print it
* twice for close colored ip with the same filename:line
*/
if (path) {
if (!prev_line || strcmp(prev_line, path)
|| color != prev_color) {
color_fprintf(stdout, color, " %s", path);
prev_line = path;
prev_color = color;
}
}
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
color_fprintf(stdout, color, " %7.2f", percent);
printf(" : ");
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
} else {
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
if (!*self->line)
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
printf(" :\n");
else
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
printf(" : %s\n", self->line);
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
}
return 0;
}
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
static struct rb_root root_sym_ext;
static void insert_source_line(struct sym_ext *sym_ext)
{
struct sym_ext *iter;
struct rb_node **p = &root_sym_ext.rb_node;
struct rb_node *parent = NULL;
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct sym_ext, node);
if (sym_ext->percent > iter->percent)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
}
rb_link_node(&sym_ext->node, parent, p);
rb_insert_color(&sym_ext->node, &root_sym_ext);
}
static void free_source_line(struct hist_entry *he, int len)
{
struct sym_priv *priv = symbol__priv(he->sym);
struct sym_ext *sym_ext = priv->ext;
int i;
if (!sym_ext)
return;
for (i = 0; i < len; i++)
free(sym_ext[i].path);
free(sym_ext);
priv->ext = NULL;
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
root_sym_ext = RB_ROOT;
}
/* Get the filename:line for the colored entries */
static void
get_source_line(struct hist_entry *he, int len, const char *filename)
{
struct symbol *sym = he->sym;
u64 start;
int i;
char cmd[PATH_MAX * 2];
struct sym_ext *sym_ext;
struct sym_priv *priv = symbol__priv(sym);
struct sym_hist *h = priv->hist;
if (!h->sum)
return;
sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
if (!priv->ext)
return;
start = he->map->unmap_ip(he->map, sym->start);
for (i = 0; i < len; i++) {
char *path = NULL;
size_t line_len;
u64 offset;
FILE *fp;
sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
if (sym_ext[i].percent <= 0.5)
continue;
offset = start + i;
sprintf(cmd, "addr2line -e %s %016llx", filename, offset);
fp = popen(cmd, "r");
if (!fp)
continue;
if (getline(&path, &line_len, fp) < 0 || !line_len)
goto next;
sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
if (!sym_ext[i].path)
goto next;
strcpy(sym_ext[i].path, path);
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
insert_source_line(&sym_ext[i]);
next:
pclose(fp);
}
}
perf: Enable more compiler warnings Related to a shadowed variable bug fix Valdis Kletnieks noticed that perf does not get built with -Wshadow, which could have helped us avoid the bug. So enable -Wshadow and also enable the following warnings on perf builds, in addition to the already enabled -Wall -Wextra -std=gnu99 warnings: -Wcast-align -Wformat=2 -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement And change/fix the perf code to build cleanly under GCC 4.3.2. The list of warnings enablement is rather arbitrary: it's based on my (quick) reading of the GCC manpages and trying them on perf. I categorized the warnings based on individually enabling them and looking whether they trigger something in the perf build. If i liked those warnings (i.e. if they trigger for something that arguably could be improved) i enabled the warning. If the warnings seemed to come from language laywers spamming the build with tons of nuisance warnings i generally kept them off. Most of the sign conversion related warnings were in this category. (A second patch enabling some of the sign warnings might be welcome - sign bugs can be nasty.) I also kept warnings that seem to make sense from their manpage description and which produced no actual warnings on our code base. These warnings might still be turned off if they end up being a nuisance. I also left out a few warnings that are not supported in older compilers. [ Note that these changes might break the build on older compilers i did not test, or on non-x86 architectures that produce different warnings, so more testing would be welcome. ] Reported-by: Valdis.Kletnieks@vt.edu Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-08-15 18:26:57 +08:00
static void print_summary(const char *filename)
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
{
struct sym_ext *sym_ext;
struct rb_node *node;
printf("\nSorted summary for file %s\n", filename);
printf("----------------------------------------------\n\n");
if (RB_EMPTY_ROOT(&root_sym_ext)) {
printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
return;
}
node = rb_first(&root_sym_ext);
while (node) {
double percent;
perf: Enable more compiler warnings Related to a shadowed variable bug fix Valdis Kletnieks noticed that perf does not get built with -Wshadow, which could have helped us avoid the bug. So enable -Wshadow and also enable the following warnings on perf builds, in addition to the already enabled -Wall -Wextra -std=gnu99 warnings: -Wcast-align -Wformat=2 -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement And change/fix the perf code to build cleanly under GCC 4.3.2. The list of warnings enablement is rather arbitrary: it's based on my (quick) reading of the GCC manpages and trying them on perf. I categorized the warnings based on individually enabling them and looking whether they trigger something in the perf build. If i liked those warnings (i.e. if they trigger for something that arguably could be improved) i enabled the warning. If the warnings seemed to come from language laywers spamming the build with tons of nuisance warnings i generally kept them off. Most of the sign conversion related warnings were in this category. (A second patch enabling some of the sign warnings might be welcome - sign bugs can be nasty.) I also kept warnings that seem to make sense from their manpage description and which produced no actual warnings on our code base. These warnings might still be turned off if they end up being a nuisance. I also left out a few warnings that are not supported in older compilers. [ Note that these changes might break the build on older compilers i did not test, or on non-x86 architectures that produce different warnings, so more testing would be welcome. ] Reported-by: Valdis.Kletnieks@vt.edu Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-08-15 18:26:57 +08:00
const char *color;
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
char *path;
sym_ext = rb_entry(node, struct sym_ext, node);
percent = sym_ext->percent;
color = get_percent_color(percent);
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
path = sym_ext->path;
color_fprintf(stdout, color, " %7.2f %s", percent, path);
node = rb_next(node);
}
}
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
static void hist_entry__print_hits(struct hist_entry *self)
{
struct symbol *sym = self->sym;
struct sym_priv *priv = symbol__priv(sym);
struct sym_hist *h = priv->hist;
u64 len = sym->end - sym->start, offset;
for (offset = 0; offset < len; ++offset)
if (h->ip[offset] != 0)
printf("%*Lx: %Lu\n", BITS_PER_LONG / 2,
sym->start + offset, h->ip[offset]);
printf("%*s: %Lu\n", BITS_PER_LONG / 2, "h->sum", h->sum);
}
static void annotate_sym(struct hist_entry *he)
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
{
struct map *map = he->map;
struct dso *dso = map->dso;
struct symbol *sym = he->sym;
perf tools: Rewrite and improve support for kernel modules Representing modules as struct map entries, backed by a DSO, etc, using /proc/modules to find where the module is loaded. DSOs now can have a short and long name, so that in verbose mode we can show exactly which .ko or vmlinux image was used. As kernel modules now are a DSO separate from the kernel, we can ask for just the hits for a particular set of kernel modules, just like we can do with shared libraries: [root@doppio linux-2.6-tip]# perf report -n --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules --dsos \[drm\] | head -15 84.58% 13266 Xorg [k] drm_clflush_pages 4.02% 630 Xorg [k] trace_kmalloc.clone.0 3.95% 619 Xorg [k] drm_ioctl 2.07% 324 Xorg [k] drm_addbufs 1.68% 263 Xorg [k] drm_gem_close_ioctl 0.77% 120 Xorg [k] drm_setmaster_ioctl 0.70% 110 Xorg [k] drm_lastclose 0.68% 106 Xorg [k] drm_open 0.54% 85 Xorg [k] drm_mm_search_free [root@doppio linux-2.6-tip]# Specifying --dsos /lib/modules/2.6.31-tip/kernel/drivers/gpu/drm/drm.ko would have the same effect. Allowing specifying just 'drm.ko' is left for another patch. Processing kallsyms so that per kernel module struct map are instantiated was also left for another patch. That will allow removing the module name from each of its symbols. struct symbol was reduced by removing the ->module backpointer and moving it (well now the map) to struct symbol_entry in perf top, that is its only user right now. The total linecount went down by ~500 lines. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-02 14:29:58 +08:00
const char *filename = dso->long_name, *d_filename;
u64 len;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
char command[PATH_MAX*2];
FILE *file;
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
LIST_HEAD(head);
struct objdump_line *pos, *n;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
if (!filename)
return;
perf tools: Rewrite and improve support for kernel modules Representing modules as struct map entries, backed by a DSO, etc, using /proc/modules to find where the module is loaded. DSOs now can have a short and long name, so that in verbose mode we can show exactly which .ko or vmlinux image was used. As kernel modules now are a DSO separate from the kernel, we can ask for just the hits for a particular set of kernel modules, just like we can do with shared libraries: [root@doppio linux-2.6-tip]# perf report -n --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules --dsos \[drm\] | head -15 84.58% 13266 Xorg [k] drm_clflush_pages 4.02% 630 Xorg [k] trace_kmalloc.clone.0 3.95% 619 Xorg [k] drm_ioctl 2.07% 324 Xorg [k] drm_addbufs 1.68% 263 Xorg [k] drm_gem_close_ioctl 0.77% 120 Xorg [k] drm_setmaster_ioctl 0.70% 110 Xorg [k] drm_lastclose 0.68% 106 Xorg [k] drm_open 0.54% 85 Xorg [k] drm_mm_search_free [root@doppio linux-2.6-tip]# Specifying --dsos /lib/modules/2.6.31-tip/kernel/drivers/gpu/drm/drm.ko would have the same effect. Allowing specifying just 'drm.ko' is left for another patch. Processing kallsyms so that per kernel module struct map are instantiated was also left for another patch. That will allow removing the module name from each of its symbols. struct symbol was reduced by removing the ->module backpointer and moving it (well now the map) to struct symbol_entry in perf top, that is its only user right now. The total linecount went down by ~500 lines. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-02 14:29:58 +08:00
pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__,
filename, sym->name, map->unmap_ip(map, sym->start),
map->unmap_ip(map, sym->end));
if (full_paths)
d_filename = filename;
else
d_filename = basename(filename);
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
len = sym->end - sym->start;
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
if (print_line) {
get_source_line(he, len, filename);
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
print_summary(filename);
}
printf("\n\n------------------------------------------------\n");
printf(" Percent | Source code & Disassembly of %s\n", d_filename);
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
printf("------------------------------------------------\n");
if (verbose >= 2)
perf tools: Rewrite and improve support for kernel modules Representing modules as struct map entries, backed by a DSO, etc, using /proc/modules to find where the module is loaded. DSOs now can have a short and long name, so that in verbose mode we can show exactly which .ko or vmlinux image was used. As kernel modules now are a DSO separate from the kernel, we can ask for just the hits for a particular set of kernel modules, just like we can do with shared libraries: [root@doppio linux-2.6-tip]# perf report -n --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules --dsos \[drm\] | head -15 84.58% 13266 Xorg [k] drm_clflush_pages 4.02% 630 Xorg [k] trace_kmalloc.clone.0 3.95% 619 Xorg [k] drm_ioctl 2.07% 324 Xorg [k] drm_addbufs 1.68% 263 Xorg [k] drm_gem_close_ioctl 0.77% 120 Xorg [k] drm_setmaster_ioctl 0.70% 110 Xorg [k] drm_lastclose 0.68% 106 Xorg [k] drm_open 0.54% 85 Xorg [k] drm_mm_search_free [root@doppio linux-2.6-tip]# Specifying --dsos /lib/modules/2.6.31-tip/kernel/drivers/gpu/drm/drm.ko would have the same effect. Allowing specifying just 'drm.ko' is left for another patch. Processing kallsyms so that per kernel module struct map are instantiated was also left for another patch. That will allow removing the module name from each of its symbols. struct symbol was reduced by removing the ->module backpointer and moving it (well now the map) to struct symbol_entry in perf top, that is its only user right now. The total linecount went down by ~500 lines. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-02 14:29:58 +08:00
printf("annotating [%p] %30s : [%p] %30s\n",
dso, dso->long_name, sym, sym->name);
sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s",
perf annotate: Fix it for non-prelinked *.so The problem was we were incorrectly calculating objdump addresses for sym->start and sym->end, look: For simple ET_DYN type DSO (*.so) with one function, objdump -dS output is something like this: 000004ac <my_strlen>: int my_strlen(const char *s) 4ac: 55 push %ebp 4ad: 89 e5 mov %esp,%ebp 4af: 83 ec 10 sub $0x10,%esp { i.e. we have relative-to-dso-mapping IPs (=RIP) there. For ET_EXEC type and probably for prelinked libs as well (sorry can't test - I don't use prelink) objdump outputs absolute IPs, e.g. 08048604 <zz_strlen>: extern "C" int zz_strlen(const char *s) 8048604: 55 push %ebp 8048605: 89 e5 mov %esp,%ebp 8048607: 83 ec 10 sub $0x10,%esp { So, if sym->start is always relative to dso mapping(*), we'll have to unmap it for ET_EXEC like cases, and leave as is for ET_DYN cases. (*) and it is - we've explicitely made it relative. Look for adjust_symbols handling in dso__load_sym() Previously we were always unmapping sym->start and for ET_DYN dsos resulting addresses were wrong, and so objdump output was empty. The end result was that perf annotate output for symbols from non-prelinked *.so had always 0.00% percents only, which is wrong. To fix it, let's introduce a helper for converting rip to objdump address, and also let's document what map_ip() and unmap_ip() do -- I had to study sources for several hours to understand it. Signed-off-by: Kirill Smelkov <kirr@landau.phys.spbu.ru> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Mike Galbraith <efault@gmx.de> LKML-Reference: <1265223128-11786-8-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-04 02:52:07 +08:00
map__rip_2objdump(map, sym->start),
map__rip_2objdump(map, sym->end),
filename, filename);
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
if (verbose >= 3)
printf("doing: %s\n", command);
file = popen(command, "r");
if (!file)
return;
while (!feof(file)) {
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
if (parse_line(file, he, &head) < 0)
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
break;
}
pclose(file);
perf annotate: Handle samples not at objdump output addr boundaries Without this patch we get this for need_resched: [root@mica ~]# perf annotate need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 0.00 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 0.00 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# But from the 'perf report' result we know that there are hits for need_resched on a 4 way machine mostly doing nothing, so after adding code to show what is in each hist offset and collapsing IP hits for what happens between objdump lines we get, for the same perf.data file: [root@mica ~]# perf annotate -v need_resched ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ : : : Disassembly of section .text: : : ffffffff810095ed <need_resched>: : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095ed: 55 push %rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 52.78 : ffffffff810095ee: be 03 00 00 00 mov $0x3,%esi : : static inline struct thread_info *current_thread_info(void) : { : struct thread_info *ti; : ti = (void *)(percpu_read_stable(kernel_stack) + 0.00 : ffffffff810095f3: 65 48 8b 3c 25 48 b5 mov %gs:0xb548,%rdi 0.00 : ffffffff810095fa: 00 00 : return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); : } : : static inline int need_resched(void) : { 0.00 : ffffffff810095fc: 48 89 e5 mov %rsp,%rbp : return unlikely(test_thread_flag(TIF_NEED_RESCHED)); 9.72 : ffffffff810095ff: 48 81 ef d8 1f 00 00 sub $0x1fd8,%rdi 0.00 : ffffffff81009606: e8 9d ff ff ff callq ffffffff810095a8 <test_ti_thread_flag> : } 0.00 : ffffffff8100960b: c9 leaveq 0.00 : ffffffff8100960c: 85 c0 test %eax,%eax 37.50 : ffffffff8100960e: 0f 95 c0 setne %al 0.00 : ffffffff81009611: 0f b6 c0 movzbl %al,%eax : Disassembly of section .vsyscall_0: : Disassembly of section .vsyscall_fn: : Disassembly of section .vsyscall_1: : Disassembly of section .vsyscall_2: : Disassembly of section .init.text: : Disassembly of section .altinstr_replacement: : Disassembly of section .exit.text: [root@mica ~]# And now 'perf annotate -v', verbose mode, will show the hits per precise IP, so that one can make sense of the attribution to each objdumop line: [root@mica ~]# perf annotate -v need_resched Looking at the vmlinux_path (5 entries long) Using /lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux for symbols annotate_sym: filename=/lib/modules/2.6.33-rc8-tip-00784-g3471df5-dirty/build/vmlinux, sym=need_resched, start=0xffffffff810095ed, end=0xffffffff81009614 ------------------------------------------------ Percent | Source code & Disassembly of vmlinux ------------------------------------------------ ffffffff810095f1: 152 ffffffff81009603: 28 ffffffff8100960f: 55 ffffffff81009610: 53 h->sum: 288 <SNIP same annotation> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Miller <davem@davemloft.net> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1267194194-15670-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-02-26 22:23:14 +08:00
if (verbose)
hist_entry__print_hits(he);
list_for_each_entry_safe(pos, n, &head, node) {
objdump_line__print(pos, &head, he, len);
list_del(&pos->node);
objdump_line__free(pos);
}
perf annotate: Print a sorted summary of annotated overhead lines It's can be very annoying to scroll down perf annotated output until we find relevant overhead. Using the -l option, you can now have a small summary sorted per overhead in the beginning of the output. Example: ./perf annotate -l -k ../../vmlinux -s __lock_acquire Sorted summary for file ../../vmlinux ---------------------------------------------- 12.04 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 4.61 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 3.77 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1775 3.56 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 2.93 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 2.83 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2545 2.30 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2388 2.20 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 2.09 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:138 1.88 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2548 1.47 /home/fweisbec/linux/linux-2.6-tip/arch/x86/include/asm/irqflags.h:15 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2594 1.36 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:730 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1654 1.26 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1653 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:2592 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 1.15 /home/fweisbec/linux/linux-2.6-tip/kernel/lockdep.c:1740 [...] Only overhead over 0.5% are summarized. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1244844682-12928-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-13 06:11:22 +08:00
if (print_line)
free_source_line(he, len);
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
}
static void perf_session__find_annotations(struct perf_session *self)
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
{
struct rb_node *nd;
for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) {
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
struct sym_priv *priv;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
if (he->sym == NULL)
continue;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
priv = symbol__priv(he->sym);
if (priv->hist == NULL)
continue;
annotate_sym(he);
/*
* Since we have a hist_entry per IP for the same symbol, free
* he->sym->hist to signal we already processed this symbol.
*/
free(priv->hist);
priv->hist = NULL;
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
}
}
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
.mmap = event__process_mmap,
.comm = event__process_comm,
.fork = event__process_task,
};
static int __cmd_annotate(void)
{
int ret;
struct perf_session *session;
session = perf_session__new(input_name, O_RDONLY, force);
if (session == NULL)
return -ENOMEM;
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;
if (dump_trace) {
event__print_totals();
goto out_delete;
}
if (verbose > 3)
perf_session__fprintf(session, stdout);
if (verbose > 2)
dsos__fprintf(stdout);
perf_session__collapse_resort(session);
perf_session__output_resort(session, session->event_total[0]);
perf_session__find_annotations(session);
out_delete:
perf_session__delete(session);
return ret;
}
static const char * const annotate_usage[] = {
"perf annotate [<options>] <command>",
NULL
};
static const struct option options[] = {
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
"symbol to annotate"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('l', "print-line", &print_line,
"print matching source lines (may be slow)"),
OPT_BOOLEAN('P', "full-paths", &full_paths,
"Don't shorten the displayed pathnames"),
OPT_END()
};
int cmd_annotate(int argc, const char **argv, const char *prefix __used)
{
argc = parse_options(argc, argv, options, annotate_usage, 0);
symbol_conf.priv_size = sizeof(struct sym_priv);
symbol_conf.try_vmlinux_path = true;
if (symbol__init() < 0)
return -1;
setup_sorting(annotate_usage, options);
perf_counter tools: Add 'perf annotate' feature Add new perf sub-command to display annotated source code: $ perf annotate decode_tree_entry ------------------------------------------------ Percent | Source code & Disassembly of /home/mingo/git/git ------------------------------------------------ : : /home/mingo/git/git: file format elf64-x86-64 : : : Disassembly of section .text: : : 00000000004a0da0 <decode_tree_entry>: : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 3.82 : 4a0da0: 41 54 push %r12 : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx : *modep = mode; : return str; : } : : static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size) : { 0.00 : 4a0da6: 49 89 fc mov %rdi,%r12 0.00 : 4a0da9: 55 push %rbp 3.37 : 4a0daa: 53 push %rbx : const char *path; : unsigned int mode, len; : : if (size < 24 || buf[size - 21]) 0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1) 3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80> : static const char *get_mode(const char *str, unsigned int *modep) : { : unsigned char c; : unsigned int mode = 0; : : if (*str == ' ') 1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax 0.39 : 4a0db7: 3c 20 cmp $0x20,%al 0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80> : return NULL; : : while ((c = *str++) != ' ') { 0.06 : 4a0dbb: 89 c2 mov %eax,%edx : if (c < '0' || c > '7') 1.99 : 4a0dbd: 31 ed xor %ebp,%ebp : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx : if (c < '0' || c > '7') 0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax 0.17 : 4a0dc6: 3c 07 cmp $0x7,%al 0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37> 0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80> 0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax) 16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax 0.14 : 4a0dd3: 3c 07 cmp $0x7,%al 0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80> : return NULL; : mode = (mode << 3) + (c - '0'); 3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax : unsigned int mode = 0; : : if (*str == ' ') : return NULL; : : while ((c = *str++) != ' ') { 0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx 16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx : if (c < '0' || c > '7') : return NULL; : mode = (mode << 3) + (c - '0'); The first column is the percentage of samples that arrived on that particular line - relative to the total cost of the function. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 21:48:52 +08:00
if (argc) {
/*
* Special case: if there's an argument left then assume tha
* it's a symbol filter:
*/
if (argc > 1)
usage_with_options(annotate_usage, options);
sym_hist_filter = argv[0];
}
setup_pager();
if (field_sep && *field_sep == '.') {
pr_err("'.' is the only non valid --field-separator argument\n");
return -1;
}
return __cmd_annotate();
}