perf/core improvements and fixes:

User visible:
 
 - perf trace --pf maj/min/all works with --call-graph: (Arnaldo Carvalho de Melo)
 
   Tracing write syscalls and major page faults with callchains while starting
   firefox, limiting the stack to 5 frames:
 
  # perf trace -e write --pf maj --max-stack 5 firefox
    589.549 ( 0.014 ms): firefox/15377 write(fd: 4, buf: 0x7fff80acc898, count: 151) = 151
                                        [0xfaed] (/usr/lib64/libpthread-2.22.so)
                                        fire_glxtest_process+0x5c (/usr/lib64/firefox/libxul.so)
                                        InstallGdkErrorHandler+0x41 (/usr/lib64/firefox/libxul.so)
                                        XREMain::XRE_mainInit+0x12c (/usr/lib64/firefox/libxul.so)
                                        XREMain::XRE_main+0x1e4 (/usr/lib64/firefox/libxul.so)
    760.704 ( 0.000 ms): firefox/15332 majfault [gtk_tree_view_accessible_get_type+0x0] => /usr/lib64/libgtk-3.so.0.1800.9@0xa0850 (x.)
                                        gtk_tree_view_accessible_get_type+0x0 (/usr/lib64/libgtk-3.so.0.1800.9)
                                        gtk_tree_view_class_intern_init+0x1a54 (/usr/lib64/libgtk-3.so.0.1800.9)
                                        g_type_class_ref+0x6dd (/usr/lib64/libgobject-2.0.so.0.4600.2)
                                        [0x115378] (/usr/lib64/libgnutls.so.30.6.3)
 
   This automagically selects "--call-graph dwarf", use "--call-graph fp" on systems
   where -fno-omit-frame-pointer was used to built the components of interest, to
   incur in less overhead, or tune "--call-graph dwarf" appropriately, see 'perf record --help'.
 
 - Allow /proc/sys/kernel/perf_event_max_stack, that defaults to the old hard coded value
   of PERF_MAX_STACK_DEPTH (127), useful for huge callstacks for things like Groovy, Ruby, etc,
   and also to reduce overhead by limiting it to a smaller value, upcoming work will allow
   this to be done per-event (Arnaldo Carvalho de Melo)
 
 - Make 'perf trace --min-stack' be honoured by --pf and --event (Arnaldo Carvalho de Melo)
 
 - Make 'perf evlist -v' decode perf_event_attr->branch_sample_type (Arnaldo Carvalho de Melo)
 
    # perf record --call lbr usleep 1
    # perf evlist -v
    cycles:ppp: ... sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK, ...
             branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES
    #
 
 - Clear dummy entry accumulated period, fixing such 'perf top/report' output
   as: (Kan Liang)
 
     4769.98%  0.01%  0.00%  0.01%  tchain_edit  [kernel] [k] update_fast_timekeeper
 
 - System calls with pid_t arguments gets them augmented with the COMM event
   more thoroughly:
 
   # trace -e perf_event_open perf stat -e cycles -p 15608
    6.876 ( 0.014 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15608 (hexchat), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
    6.882 ( 0.005 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15639 (gmain), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
    6.889 ( 0.005 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15640 (gdbus), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5
                                                             ^^^^^^^^^^^^^^^^^^
    ^C
 
 - Fix offline module name mismatch issue in 'perf probe' (Ravi Bangoria)
 
 - Fix module probe issue if no dwarf support in (Ravi Bangoria)
 
 Assorted fixes:
 
 - Fix off-by-one in write_buildid() (Andrey Ryabinin)
 
 - Fix segfault when printing callchains in 'perf script' (Chris Phlipot)
 
 - Replace assignment with comparison on assert check in 'perf test' entry (Colin Ian King)
 
 - Fix off-by-one comparison in intel-pt code (Colin Ian King)
 
 - Close target file on error path in 'perf probe' (Masami Hiramatsu)
 
 - Set default kprobe group name if not given in 'perf probe' (Masami Hiramatsu)
 
 - Avoid partial perf_event_header reads (Wang Nan)
 
 Infrastructure:
 
 - Update x86's syscall_64.tbl copy, adding preadv2 & pwritev2 (Arnaldo Carvalho de Melo)
 
 - Make the x86 clean quiet wrt syscall table removal (Jiri Olsa)
 
 Cleanups:
 
 - Simplify wrapper for LOCK_PI in 'perf bench futex' (Davidlohr Bueso)
 
 - Remove duplicate const qualifier (Eric Engestrom)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJXIMqAAAoJENZQFvNTUqpA/vEQAJIPQb+D9vHYrJ59Q27ZX4WE
 OjBcKTfAXRtkg+5GmGruFqGF0BgU98fdxsG9r/vOpZfpUsiERbf2uXJp8NOZsePG
 9MT0znfimGHbbTKQPEg5VzHPRyL+CjYF6Nu4jAFNHs1Hx4LUrC3k/CR13GwQ8cW9
 lTHQIESl+E0FMrrCNESCwcYvjpeOefdDaj+vbT8Csmy6zwDGKwzlS0fp4cTQm3U1
 IZ1JRHX0RDq7Lvs6OA+IY2BDh8bC8tokzmldJedq4uQjyjkT9pM/KYtYogJ6+70D
 B+JV77Tc6ORq8/WgneS78GeeQiVJzqkX7DBNjFrHjMagaiqrUwBPvwVhVbMXOfkm
 LQhkaytFBn1mM9DZZMPY9mK97/V2NUWRO2e/iOjDZaj7349TxBtDHiXZbF+kG0MH
 bH4AMopuVI0ULjKmwloACsuL5djgOIUu87AWJ2oLkgPxcRv0LR4jk7Tjt/q6nnKM
 2/RVsJd8U4kZGoy4B+jOsSB7swaT8t0HhfNebLP4yGHAS8BpMmRx06BFqot+FoaV
 +mHflfOADrPqO9mh30t93FwQV0dnSXg9sKn9hSXf/rweqQCzX+/BYSeJipaUDXS7
 vrx2FFcMbDzh+HEfIUa+j+TLnh2ZNlwdFi+YkUHjF8VRXfHt4scIuReobl5s1Ywc
 GpT5GLccSfopST1WjrNX
 =MzXA
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-20160427' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- perf trace --pf maj/min/all works with --call-graph: (Arnaldo Carvalho de Melo)

  Tracing write syscalls and major page faults with callchains while starting
  firefox, limiting the stack to 5 frames:

 # perf trace -e write --pf maj --max-stack 5 firefox
   589.549 ( 0.014 ms): firefox/15377 write(fd: 4, buf: 0x7fff80acc898, count: 151) = 151
                                       [0xfaed] (/usr/lib64/libpthread-2.22.so)
                                       fire_glxtest_process+0x5c (/usr/lib64/firefox/libxul.so)
                                       InstallGdkErrorHandler+0x41 (/usr/lib64/firefox/libxul.so)
                                       XREMain::XRE_mainInit+0x12c (/usr/lib64/firefox/libxul.so)
                                       XREMain::XRE_main+0x1e4 (/usr/lib64/firefox/libxul.so)
   760.704 ( 0.000 ms): firefox/15332 majfault [gtk_tree_view_accessible_get_type+0x0] => /usr/lib64/libgtk-3.so.0.1800.9@0xa0850 (x.)
                                       gtk_tree_view_accessible_get_type+0x0 (/usr/lib64/libgtk-3.so.0.1800.9)
                                       gtk_tree_view_class_intern_init+0x1a54 (/usr/lib64/libgtk-3.so.0.1800.9)
                                       g_type_class_ref+0x6dd (/usr/lib64/libgobject-2.0.so.0.4600.2)
                                       [0x115378] (/usr/lib64/libgnutls.so.30.6.3)

  This automagically selects "--call-graph dwarf", use "--call-graph fp" on systems
  where -fno-omit-frame-pointer was used to built the components of interest, to
  incur in less overhead, or tune "--call-graph dwarf" appropriately, see 'perf record --help'.

- Allow /proc/sys/kernel/perf_event_max_stack, that defaults to the old hard coded value
  of PERF_MAX_STACK_DEPTH (127), useful for huge callstacks for things like Groovy, Ruby, etc,
  and also to reduce overhead by limiting it to a smaller value, upcoming work will allow
  this to be done per-event (Arnaldo Carvalho de Melo)

- Make 'perf trace --min-stack' be honoured by --pf and --event (Arnaldo Carvalho de Melo)

- Make 'perf evlist -v' decode perf_event_attr->branch_sample_type (Arnaldo Carvalho de Melo)

   # perf record --call lbr usleep 1
   # perf evlist -v
   cycles:ppp: ... sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK, ...
            branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES
   #

- Clear dummy entry accumulated period, fixing such 'perf top/report' output
  as: (Kan Liang)

    4769.98%  0.01%  0.00%  0.01%  tchain_edit  [kernel] [k] update_fast_timekeeper

- System calls with pid_t arguments gets them augmented with the COMM event
  more thoroughly:

  # trace -e perf_event_open perf stat -e cycles -p 15608
   6.876 ( 0.014 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15608 (hexchat), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
   6.882 ( 0.005 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15639 (gmain), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
   6.889 ( 0.005 ms): perf_event_open(attr_uptr: 0x2ae20d8, pid: 15640 (gdbus), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5
                                                            ^^^^^^^^^^^^^^^^^^
   ^C

- Fix offline module name mismatch issue in 'perf probe' (Ravi Bangoria)

- Fix module probe issue if no dwarf support in (Ravi Bangoria)

Assorted fixes:

- Fix off-by-one in write_buildid() (Andrey Ryabinin)

- Fix segfault when printing callchains in 'perf script' (Chris Phlipot)

- Replace assignment with comparison on assert check in 'perf test' entry (Colin Ian King)

- Fix off-by-one comparison in intel-pt code (Colin Ian King)

- Close target file on error path in 'perf probe' (Masami Hiramatsu)

- Set default kprobe group name if not given in 'perf probe' (Masami Hiramatsu)

- Avoid partial perf_event_header reads (Wang Nan)

Infrastructure changes:

- Update x86's syscall_64.tbl copy, adding preadv2 & pwritev2 (Arnaldo Carvalho de Melo)

- Make the x86 clean quiet wrt syscall table removal (Jiri Olsa)

Cleanups:

- Simplify wrapper for LOCK_PI in 'perf bench futex' (Davidlohr Bueso)

- Remove duplicate const qualifier (Eric Engestrom)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2016-04-27 17:02:24 +02:00
commit a8944c5bf8
49 changed files with 475 additions and 179 deletions

View File

@ -60,6 +60,7 @@ show up in /proc/sys/kernel:
- panic_on_warn
- perf_cpu_time_max_percent
- perf_event_paranoid
- perf_event_max_stack
- pid_max
- powersave-nap [ PPC only ]
- printk
@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN). The default value is 1.
==============================================================
perf_event_max_stack:
Controls maximum number of stack frames to copy for (attr.sample_type &
PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
'perf record -g' or 'perf trace --call-graph fp'.
This can only be done when no events are in use that have callchains
enabled, otherwise writing to this file will return -EBUSY.
The default value is 127.
==============================================================
pid_max:
PID allocation wrap value. When the kernel's next PID value

View File

@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
while ((entry->nr < sysctl_perf_event_max_stack) &&
tail && !((unsigned long)tail & 0x3))
tail = user_backtrace(tail, entry);
}

View File

@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
tail = (struct frame_tail __user *)regs->regs[29];
while (entry->nr < PERF_MAX_STACK_DEPTH &&
while (entry->nr < sysctl_perf_event_max_stack &&
tail && !((unsigned long)tail & 0xf))
tail = user_backtrace(tail, entry);
} else {
@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
while ((entry->nr < sysctl_perf_event_max_stack) &&
tail && !((unsigned long)tail & 0x3))
tail = compat_user_backtrace(tail, entry);
#endif

View File

@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
--frame;
while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
while ((entry->nr < sysctl_perf_event_max_stack) && frame)
frame = user_backtrace(frame, entry);
}

View File

@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
addr = *sp++;
if (__kernel_text_address(addr)) {
perf_callchain_store(entry, addr);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
if (entry->nr >= sysctl_perf_event_max_stack)
break;
}
}
@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
}
do {
perf_callchain_store(entry, pc);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
if (entry->nr >= sysctl_perf_event_max_stack)
break;
pc = unwind_stack(current, &sp, pc, &ra);
} while (pc);

View File

@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < PERF_MAX_STACK_DEPTH) {
while (entry->nr < sysctl_perf_event_max_stack) {
fp = (unsigned long __user *) sp;
if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
return;
@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < PERF_MAX_STACK_DEPTH) {
while (entry->nr < sysctl_perf_event_max_stack) {
fp = (unsigned int __user *) (unsigned long) sp;
if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
return;

View File

@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
}
}
#endif
} while (entry->nr < PERF_MAX_STACK_DEPTH);
} while (entry->nr < sysctl_perf_event_max_stack);
}
static inline int
@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
pc = sf.callers_pc;
ufp = (unsigned long)sf.fp + STACK_BIAS;
perf_callchain_store(entry, pc);
} while (entry->nr < PERF_MAX_STACK_DEPTH);
} while (entry->nr < sysctl_perf_event_max_stack);
}
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
ufp = (unsigned long)sf.fp;
}
perf_callchain_store(entry, pc);
} while (entry->nr < PERF_MAX_STACK_DEPTH);
} while (entry->nr < sysctl_perf_event_max_stack);
}
void

View File

@ -2277,7 +2277,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
while (entry->nr < sysctl_perf_event_max_stack) {
unsigned long bytes;
frame.next_frame = 0;
frame.return_address = 0;
@ -2337,7 +2337,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
while (entry->nr < sysctl_perf_event_max_stack) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;

View File

@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data)
void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
callchain_trace, NULL, entry);
}
void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
callchain_trace, entry);
}

View File

@ -58,7 +58,7 @@ struct perf_guest_info_callbacks {
struct perf_callchain_entry {
__u64 nr;
__u64 ip[PERF_MAX_STACK_DEPTH];
__u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
};
struct perf_raw_record {
@ -993,9 +993,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
extern int get_callchain_buffers(void);
extern void put_callchain_buffers(void);
extern int sysctl_perf_event_max_stack;
static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
if (entry->nr < PERF_MAX_STACK_DEPTH) {
if (entry->nr < sysctl_perf_event_max_stack) {
entry->ip[entry->nr++] = ip;
return 0;
} else {
@ -1017,6 +1019,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
int perf_event_max_stack_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
static inline bool perf_paranoid_tracepoint_raw(void)
{

View File

@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
value_size < 8 || value_size % 8 ||
value_size / 8 > PERF_MAX_STACK_DEPTH)
value_size / 8 > sysctl_perf_event_max_stack)
return ERR_PTR(-EINVAL);
/* hash table size must be power of 2 */
@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 max_depth = map->value_size / 8;
/* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
return -EFAULT;
/* get_perf_callchain() guarantees that trace->nr >= init_nr
* and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
*/
trace_nr = trace->nr - init_nr;

View File

@ -18,6 +18,14 @@ struct callchain_cpus_entries {
struct perf_callchain_entry *cpu_entries[0];
};
int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
static inline size_t perf_callchain_entry__sizeof(void)
{
return (sizeof(struct perf_callchain_entry) +
sizeof(__u64) * sysctl_perf_event_max_stack);
}
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
if (!entries)
return -ENOMEM;
size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
for_each_possible_cpu(cpu) {
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
cpu = smp_processor_id();
return &entries->cpu_entries[cpu][*rctx];
return (((void *)entries->cpu_entries[cpu]) +
(*rctx * perf_callchain_entry__sizeof()));
}
static void
@ -215,3 +224,25 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
return entry;
}
int perf_event_max_stack_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int new_value = sysctl_perf_event_max_stack, ret;
struct ctl_table new_table = *table;
new_table.data = &new_value;
ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
mutex_lock(&callchain_mutex);
if (atomic_read(&nr_callchain_events))
ret = -EBUSY;
else
sysctl_perf_event_max_stack = new_value;
mutex_unlock(&callchain_mutex);
return ret;
}

View File

@ -130,6 +130,9 @@ static int one_thousand = 1000;
#ifdef CONFIG_PRINTK
static int ten_thousand = 10000;
#endif
#ifdef CONFIG_PERF_EVENTS
static int six_hundred_forty_kb = 640 * 1024;
#endif
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one_hundred,
},
{
.procname = "perf_event_max_stack",
.data = NULL, /* filled in by handler */
.maxlen = sizeof(sysctl_perf_event_max_stack),
.mode = 0644,
.proc_handler = perf_event_max_stack_handler,
.extra1 = &zero,
.extra2 = &six_hundred_forty_kb,
},
#endif
#ifdef CONFIG_KMEMCHECK
{

View File

@ -137,7 +137,8 @@ libsubcmd_clean:
$(call descend,lib/subcmd,clean)
perf_clean:
$(call descend,$(@:_clean=),clean)
$(Q)mkdir -p $(PERF_O) .
$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
selftests_clean:
$(call descend,testing/$(@:_clean=),clean)

View File

@ -351,6 +351,19 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
return err;
}
int procfs__read_str(const char *entry, char **buf, size_t *sizep)
{
char path[PATH_MAX];
const char *procfs = procfs__mountpoint();
if (!procfs)
return -1;
snprintf(path, sizeof(path), "%s/%s", procfs, entry);
return filename__read_str(path, buf, sizep);
}
int sysfs__read_ull(const char *entry, unsigned long long *value)
{
char path[PATH_MAX];

View File

@ -29,6 +29,8 @@ int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_str(const char *filename, char **buf, size_t *sizep);
int procfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysctl__read_int(const char *sysctl, int *value);
int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value);

View File

@ -248,7 +248,7 @@ OPTIONS
Note that when using the --itrace option the synthesized callchain size
will override this value if the synthesized callchain size is bigger.
Default: 127
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
-G::
--inverted::

View File

@ -267,7 +267,7 @@ include::itrace.txt[]
Note that when using the --itrace option the synthesized callchain size
will override this value if the synthesized callchain size is bigger.
Default: 127
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
--ns::
Use 9 decimal places when displaying time (i.e. show the nanoseconds)

View File

@ -177,7 +177,7 @@ Default is to monitor all CPUS.
between information loss and faster processing especially for
workloads that can have a very long callchain stack.
Default: 127
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
--ignore-callees=<regex>::
Ignore callees of the function(s) matching the given regex.

View File

@ -143,7 +143,7 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
Implies '--call-graph dwarf' when --call-graph not present on the
command line, on systems where DWARF unwinding was built in.
Default: 127
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
--min-stack::
Set the stack depth limit when parsing the callchain, anything

View File

@ -24,6 +24,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl)
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
clean::
rm -f $(header)
$(call QUIET_CLEAN, x86) $(RM) $(header)
archheaders: $(header)

View File

@ -333,6 +333,8 @@
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
326 common copy_file_range sys_copy_file_range
327 64 preadv2 sys_preadv2
328 64 pwritev2 sys_pwritev2
#
# x32-specific system call numbers start at 512 to avoid cache impact

View File

@ -83,7 +83,7 @@ static void *workerfn(void *arg)
do {
int ret;
again:
ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
ret = futex_lock_pi(w->futex, NULL, futex_flag);
if (ret) { /* handle lock acquisition */
if (!silent)

View File

@ -57,13 +57,11 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
/**
* futex_lock_pi() - block on uaddr as a PI mutex
* @detect: whether (1) or not (0) to perform deadlock detection
*/
static inline int
futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
int opflags)
futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
{
return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
}
/**

View File

@ -6,6 +6,7 @@
* Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*/
#include "debug.h"
#include "../perf.h"
#include "../util/util.h"
#include <subcmd/parse-options.h>
@ -63,14 +64,16 @@ static struct perf_event_attr cycle_attr = {
.config = PERF_COUNT_HW_CPU_CYCLES
};
static void init_cycles(void)
static int init_cycles(void)
{
cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
if (cycles_fd < 0 && errno == ENOSYS)
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
else
BUG_ON(cycles_fd < 0);
if (cycles_fd < 0 && errno == ENOSYS) {
pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
return -1;
}
return cycles_fd;
}
static u64 get_cycles(void)
@ -155,8 +158,13 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
argc = parse_options(argc, argv, options, info->usage, 0);
if (use_cycles)
init_cycles();
if (use_cycles) {
i = init_cycles();
if (i < 0) {
fprintf(stderr, "Failed to open cycles counter\n");
return i;
}
}
size = (size_t)perf_atoll((char *)size_str);
size_total = (double)size * nr_loops;

View File

@ -691,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
.ordered_events = true,
.ordering_requires_timestamps = true,
},
.max_stack = PERF_MAX_STACK_DEPTH,
.max_stack = sysctl_perf_event_max_stack,
.pretty_printing_style = "normal",
.socket_filter = -1,
};
@ -744,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
"alias for inverted call graph"),
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",

View File

@ -570,12 +570,12 @@ static void print_sample_bts(struct perf_sample *sample,
/* print branch_from information */
if (PRINT_FIELD(IP)) {
unsigned int print_opts = output[attr->type].print_ip_opts;
struct callchain_cursor *cursor = NULL, cursor_callchain;
struct callchain_cursor *cursor = NULL;
if (symbol_conf.use_callchain && sample->callchain &&
thread__resolve_callchain(al->thread, &cursor_callchain, evsel,
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
sample, NULL, NULL, scripting_max_stack) == 0)
cursor = &cursor_callchain;
cursor = &callchain_cursor;
if (cursor == NULL) {
putchar(' ');
@ -789,12 +789,12 @@ static void process_event(struct perf_script *script,
printf("%16" PRIu64, sample->weight);
if (PRINT_FIELD(IP)) {
struct callchain_cursor *cursor = NULL, cursor_callchain;
struct callchain_cursor *cursor = NULL;
if (symbol_conf.use_callchain && sample->callchain &&
thread__resolve_callchain(al->thread, &cursor_callchain, evsel,
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
sample, NULL, NULL, scripting_max_stack) == 0)
cursor = &cursor_callchain;
cursor = &callchain_cursor;
putchar(cursor ? '\n' : ' ');
sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
@ -2031,7 +2031,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@ -2067,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
NULL
};
scripting_max_stack = sysctl_perf_event_max_stack;
setup_scripting();
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,

View File

@ -1103,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
},
.proc_map_timeout = 500,
},
.max_stack = PERF_MAX_STACK_DEPTH,
.max_stack = sysctl_perf_event_max_stack,
.sym_pcnt_filter = 5,
};
struct record_opts *opts = &top.record_opts;
@ -1171,7 +1171,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &top.max_stack,
"Set the maximum stack depth when parsing the callchain. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
"ignore callees of these functions in call graphs",
report_parse_ignore_callees_opt),

View File

@ -56,22 +56,6 @@
# define MSG_CMSG_CLOEXEC 0x40000000
#endif
#ifndef PERF_FLAG_FD_NO_GROUP
# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#endif
#ifndef PERF_FLAG_FD_OUTPUT
# define PERF_FLAG_FD_OUTPUT (1UL << 1)
#endif
#ifndef PERF_FLAG_PID_CGROUP
# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
#endif
#ifndef PERF_FLAG_FD_CLOEXEC
# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#endif
struct trace {
struct perf_tool tool;
struct syscalltbl *sctbl;
@ -674,34 +658,6 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
int printed = 0, flags = arg->val;
if (flags == 0)
return 0;
#define P_FLAG(n) \
if (flags & PERF_FLAG_##n) { \
printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
flags &= ~PERF_FLAG_##n; \
}
P_FLAG(FD_NO_GROUP);
P_FLAG(FD_OUTPUT);
P_FLAG(PID_CGROUP);
P_FLAG(FD_CLOEXEC);
#undef P_FLAG
if (flags)
printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
return printed;
}
#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@ -894,6 +850,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
#include "trace/beauty/pid.c"
#include "trace/beauty/mmap.c"
#include "trace/beauty/mode_t.c"
#include "trace/beauty/perf_event_open.c"
#include "trace/beauty/sched_policy.c"
#include "trace/beauty/socket_type.c"
#include "trace/beauty/waitid_options.c"
@ -1086,8 +1043,7 @@ static struct syscall_fmt {
[1] = SCA_FILENAME, /* filename */
[2] = SCA_OPEN_FLAGS, /* flags */ }, },
{ .name = "perf_event_open", .errmsg = true,
.arg_scnprintf = { [1] = SCA_INT, /* pid */
[2] = SCA_INT, /* cpu */
.arg_scnprintf = { [2] = SCA_INT, /* cpu */
[3] = SCA_FD, /* group_fd */
[4] = SCA_PERF_FLAGS, /* flags */ }, },
{ .name = "pipe2", .errmsg = true,
@ -2126,6 +2082,17 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
int callchain_ret = 0;
if (sample->callchain) {
callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
if (callchain_ret == 0) {
if (callchain_cursor.nr < trace->min_stack)
goto out;
callchain_ret = 1;
}
}
trace__printf_interrupted_entry(trace, sample);
trace__fprintf_tstamp(trace, sample->time, trace->output);
@ -2144,11 +2111,11 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
fprintf(trace->output, ")\n");
if (sample->callchain) {
if (trace__resolve_callchain(trace, evsel, sample, &callchain_cursor) == 0)
trace__fprintf_callchain(trace, sample);
}
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
out:
return 0;
}
@ -2179,8 +2146,19 @@ static int trace__pgfault(struct trace *trace,
char map_type = 'd';
struct thread_trace *ttrace;
int err = -1;
int callchain_ret = 0;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
if (sample->callchain) {
callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
if (callchain_ret == 0) {
if (callchain_cursor.nr < trace->min_stack)
goto out_put;
callchain_ret = 1;
}
}
ttrace = thread__trace(thread, trace->output);
if (ttrace == NULL)
goto out_put;
@ -2222,6 +2200,11 @@ static int trace__pgfault(struct trace *trace,
print_location(trace->output, sample, &al, true, false);
fprintf(trace->output, " (%c%c)\n", map_type, al.level);
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
out:
err = 0;
out_put:
@ -2381,8 +2364,7 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
return true;
}
static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
u64 config)
static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
{
struct perf_evsel *evsel;
struct perf_event_attr attr = {
@ -2396,13 +2378,10 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
event_attr_init(&attr);
evsel = perf_evsel__new(&attr);
if (!evsel)
return -ENOMEM;
if (evsel)
evsel->handler = trace__pgfault;
evsel->handler = trace__pgfault;
perf_evlist__add(evlist, evsel);
return 0;
return evsel;
}
static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
@ -2504,7 +2483,7 @@ static int trace__set_ev_qualifier_filter(struct trace *trace)
static int trace__run(struct trace *trace, int argc, const char **argv)
{
struct perf_evlist *evlist = trace->evlist;
struct perf_evsel *evsel;
struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
int err = -1, i;
unsigned long before;
const bool forks = argc > 0;
@ -2518,14 +2497,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (trace->trace_syscalls)
trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
goto out_error_mem;
if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
if (pgfault_maj == NULL)
goto out_error_mem;
perf_evlist__add(evlist, pgfault_maj);
}
if ((trace->trace_pgfaults & TRACE_PFMIN) &&
perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
goto out_error_mem;
if ((trace->trace_pgfaults & TRACE_PFMIN)) {
pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
if (pgfault_min == NULL)
goto out_error_mem;
perf_evlist__add(evlist, pgfault_min);
}
if (trace->sched &&
perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
@ -2546,24 +2530,42 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
perf_evlist__config(evlist, &trace->opts, NULL);
if (callchain_param.enabled && trace->syscalls.events.sys_exit) {
perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
&trace->opts, &callchain_param);
/*
* Now we have evsels with different sample_ids, use
* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
* from a fixed position in each ring buffer record.
*
* As of this the changeset introducing this comment, this
* isn't strictly needed, as the fields that can come before
* PERF_SAMPLE_ID are all used, but we'll probably disable
* some of those for things like copying the payload of
* pointer syscall arguments, and for vfs_getname we don't
* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
*/
perf_evlist__set_sample_bit(evlist, IDENTIFIER);
perf_evlist__reset_sample_bit(evlist, ID);
if (callchain_param.enabled) {
bool use_identifier = false;
if (trace->syscalls.events.sys_exit) {
perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
&trace->opts, &callchain_param);
use_identifier = true;
}
if (pgfault_maj) {
perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
use_identifier = true;
}
if (pgfault_min) {
perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
use_identifier = true;
}
if (use_identifier) {
/*
* Now we have evsels with different sample_ids, use
* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
* from a fixed position in each ring buffer record.
*
* As of this the changeset introducing this comment, this
* isn't strictly needed, as the fields that can come before
* PERF_SAMPLE_ID are all used, but we'll probably disable
* some of those for things like copying the payload of
* pointer syscall arguments, and for vfs_getname we don't
* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
*/
perf_evlist__set_sample_bit(evlist, IDENTIFIER);
perf_evlist__reset_sample_bit(evlist, ID);
}
}
signal(SIGCHLD, sig_handler);
@ -3104,7 +3106,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_UINTEGER(0, "max-stack", &trace.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_END()
@ -3148,7 +3150,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
mmap_pages_user_set = false;
if (trace.max_stack == UINT_MAX) {
trace.max_stack = PERF_MAX_STACK_DEPTH;
trace.max_stack = sysctl_perf_event_max_stack;
max_stack_user_set = false;
}

View File

@ -17,6 +17,7 @@
#include <subcmd/parse-options.h>
#include "util/bpf-loader.h"
#include "util/debug.h"
#include <api/fs/fs.h>
#include <api/fs/tracing_path.h>
#include <pthread.h>
#include <stdlib.h>
@ -533,6 +534,7 @@ int main(int argc, const char **argv)
{
const char *cmd;
char sbuf[STRERR_BUFSIZE];
int value;
/* libsubcmd init */
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@ -542,6 +544,9 @@ int main(int argc, const char **argv)
page_size = sysconf(_SC_PAGE_SIZE);
cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
sysctl_perf_event_max_stack = value;
cmd = extract_argv0_path(argv[0]);
if (!cmd)
cmd = "perf-help";

View File

@ -30,7 +30,7 @@ static int process_event_scale(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong id", ev->id == 123);
TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123);
TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123);
return 0;
}

View File

@ -101,7 +101,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
if (machine__resolve(machine, &al, &sample) < 0)
goto out;
if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
NULL) < 0) {
addr_location__put(&al);
goto out;

View File

@ -81,7 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist,
al.socket = fake_samples[i].socket;
if (hist_entry_iter__add(&iter, &al,
PERF_MAX_STACK_DEPTH, NULL) < 0) {
sysctl_perf_event_max_stack, NULL) < 0) {
addr_location__put(&al);
goto out;
}

View File

@ -67,7 +67,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
if (machine__resolve(machine, &al, &sample) < 0)
goto out;
if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
NULL) < 0) {
addr_location__put(&al);
goto out;

View File

@ -0,0 +1,43 @@
#ifndef PERF_FLAG_FD_NO_GROUP
# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#endif
#ifndef PERF_FLAG_FD_OUTPUT
# define PERF_FLAG_FD_OUTPUT (1UL << 1)
#endif
#ifndef PERF_FLAG_PID_CGROUP
# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
#endif
#ifndef PERF_FLAG_FD_CLOEXEC
# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#endif
static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
int printed = 0, flags = arg->val;
if (flags == 0)
return 0;
#define P_FLAG(n) \
if (flags & PERF_FLAG_##n) { \
printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
flags &= ~PERF_FLAG_##n; \
}
P_FLAG(FD_NO_GROUP);
P_FLAG(FD_OUTPUT);
P_FLAG(PID_CGROUP);
P_FLAG(FD_CLOEXEC);
#undef P_FLAG
if (flags)
printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
return printed;
}
#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags

View File

@ -3,9 +3,12 @@ static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_a
int pid = arg->val;
struct trace *trace = arg->trace;
size_t printed = scnprintf(bf, size, "%d", pid);
struct thread *thread = machine__find_thread(trace->host, pid, pid);
struct thread *thread = machine__findnew_thread(trace->host, pid, pid);
if (thread != NULL) {
if (!thread->comm_set)
thread__set_comm_from_proc(thread);
if (thread->comm_set)
printed += scnprintf(bf + printed, size - printed,
" (%s)", thread__comm_str(thread));

View File

@ -261,14 +261,14 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
if (dso__is_vdso(pos)) {
name = pos->short_name;
name_len = pos->short_name_len + 1;
name_len = pos->short_name_len;
} else if (dso__is_kcore(pos)) {
machine__mmap_name(machine, nm, sizeof(nm));
name = nm;
name_len = strlen(nm) + 1;
name_len = strlen(nm);
} else {
name = pos->long_name;
name_len = pos->long_name_len + 1;
name_len = pos->long_name_len;
}
in_kernel = pos->kernel ||

View File

@ -684,6 +684,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
struct perf_mmap *md = &evlist->mmap[idx];
u64 head;
u64 old = md->prev;
int diff;
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
@ -694,6 +695,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
return NULL;
head = perf_mmap__read_head(md);
diff = head - old;
if (evlist->overwrite) {
/*
* If we're further behind than half the buffer, there's a chance
@ -703,7 +705,6 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
*
* In either case, truncate and restart at head.
*/
int diff = head - old;
if (diff > md->mask / 2 || diff < 0) {
fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
@ -711,15 +712,21 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
* head points to a known good entry, start there.
*/
old = head;
diff = 0;
}
}
if (old != head) {
if (diff >= (int)sizeof(event->header)) {
size_t size;
event = (union perf_event *)&data[old & md->mask];
size = event->header.size;
if (size < sizeof(event->header) || diff < (int)size) {
event = NULL;
goto broken_event;
}
/*
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
@ -743,6 +750,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
old += size;
}
broken_event:
md->prev = old;
return event;

View File

@ -1231,6 +1231,21 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
__p_bits(buf, size, value, bits);
}
static void __p_branch_sample_type(char *buf, size_t size, u64 value)
{
#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
struct bit_names bits[] = {
bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
{ .name = NULL, }
};
#undef bit_name
__p_bits(buf, size, value, bits);
}
static void __p_read_format(char *buf, size_t size, u64 value)
{
#define bit_name(n) { PERF_FORMAT_##n, #n }
@ -1249,6 +1264,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
#define PRINT_ATTRn(_n, _f, _p) \
@ -1305,7 +1321,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(bp_type, p_unsigned);
PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
PRINT_ATTRf(branch_sample_type, p_unsigned);
PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
PRINT_ATTRf(sample_regs_user, p_hex);
PRINT_ATTRf(sample_stack_user, p_unsigned);
PRINT_ATTRf(clockid, p_signed);

View File

@ -2062,6 +2062,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
if (he) {
memset(&he->stat, 0, sizeof(he->stat));
he->hists = hists;
if (symbol_conf.cumulate_callchain)
memset(he->stat_acc, 0, sizeof(he->stat));
rb_link_node(&he->rb_node_in, parent, p);
rb_insert_color(&he->rb_node_in, root);
hists__inc_stats(hists, he);

View File

@ -356,7 +356,7 @@ static const char *intel_pt_err_msgs[] = {
int intel_pt__strerror(int code, char *buf, size_t buflen)
{
if (code < 1 || code > INTEL_PT_ERR_MAX)
if (code < 1 || code >= INTEL_PT_ERR_MAX)
code = INTEL_PT_ERR_UNK;
strlcpy(buf, intel_pt_err_msgs[code], buflen);
return 0;

View File

@ -1764,7 +1764,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
*/
int mix_chain_nr = i + 1 + lbr_nr + 1;
if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) {
if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
}
@ -1825,7 +1825,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
* Based on DWARF debug information, some architectures skip
* a callchain entry saved by the kernel.
*/
if (chain->nr < PERF_MAX_STACK_DEPTH)
if (chain->nr < sysctl_perf_event_max_stack)
skip_idx = arch_skip_callchain_idx(thread, chain);
/*
@ -1886,7 +1886,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
}
check_calls:
if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
}

View File

@ -265,6 +265,65 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
return true;
}
/*
* NOTE:
* '.gnu.linkonce.this_module' section of kernel module elf directly
* maps to 'struct module' from linux/module.h. This section contains
* actual module name which will be used by kernel after loading it.
* But, we cannot use 'struct module' here since linux/module.h is not
* exposed to user-space. Offset of 'name' has remained same from long
* time, so hardcoding it here.
*/
#ifdef __LP64__
#define MOD_NAME_OFFSET 24
#else
#define MOD_NAME_OFFSET 12
#endif
/*
* @module can be module name of module file path. In case of path,
* inspect elf and find out what is actual module name.
* Caller has to free mod_name after using it.
*/
static char *find_module_name(const char *module)
{
int fd;
Elf *elf;
GElf_Ehdr ehdr;
GElf_Shdr shdr;
Elf_Data *data;
Elf_Scn *sec;
char *mod_name = NULL;
fd = open(module, O_RDONLY);
if (fd < 0)
return NULL;
elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
if (elf == NULL)
goto elf_err;
if (gelf_getehdr(elf, &ehdr) == NULL)
goto ret_err;
sec = elf_section_by_name(elf, &ehdr, &shdr,
".gnu.linkonce.this_module", NULL);
if (!sec)
goto ret_err;
data = elf_getdata(sec, NULL);
if (!data || !data->d_buf)
goto ret_err;
mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
ret_err:
elf_end(elf);
elf_err:
close(fd);
return mod_name;
}
#ifdef HAVE_DWARF_SUPPORT
static int kernel_get_module_dso(const char *module, struct dso **pdso)
@ -486,8 +545,10 @@ static int get_text_start_address(const char *exec, unsigned long *address)
return -errno;
elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
if (elf == NULL)
return -EINVAL;
if (elf == NULL) {
ret = -EINVAL;
goto out_close;
}
if (gelf_getehdr(elf, &ehdr) == NULL)
goto out;
@ -499,6 +560,9 @@ static int get_text_start_address(const char *exec, unsigned long *address)
ret = 0;
out:
elf_end(elf);
out_close:
close(fd);
return ret;
}
@ -583,32 +647,23 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
int ntevs, const char *module)
{
int i, ret = 0;
char *tmp;
char *mod_name = NULL;
if (!module)
return 0;
tmp = strrchr(module, '/');
if (tmp) {
/* This is a module path -- get the module name */
module = strdup(tmp + 1);
if (!module)
return -ENOMEM;
tmp = strchr(module, '.');
if (tmp)
*tmp = '\0';
tmp = (char *)module; /* For free() */
}
mod_name = find_module_name(module);
for (i = 0; i < ntevs; i++) {
tevs[i].point.module = strdup(module);
tevs[i].point.module =
strdup(mod_name ? mod_name : module);
if (!tevs[i].point.module) {
ret = -ENOMEM;
break;
}
}
free(tmp);
free(mod_name);
return ret;
}
@ -2516,6 +2571,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
struct probe_trace_point *tp;
int num_matched_functions;
int ret, i, j, skipped = 0;
char *mod_name;
map = get_target_map(pev->target, pev->uprobes);
if (!map) {
@ -2600,9 +2656,19 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
tp->realname = strdup_or_goto(sym->name, nomem_out);
tp->retprobe = pp->retprobe;
if (pev->target)
tev->point.module = strdup_or_goto(pev->target,
nomem_out);
if (pev->target) {
if (pev->uprobes) {
tev->point.module = strdup_or_goto(pev->target,
nomem_out);
} else {
mod_name = find_module_name(pev->target);
tev->point.module =
strdup(mod_name ? mod_name : pev->target);
free(mod_name);
if (!tev->point.module)
goto nomem_out;
}
}
tev->uprobes = pev->uprobes;
tev->nargs = pev->nargs;
if (tev->nargs) {
@ -2743,9 +2809,13 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
{
int ret;
if (pev->uprobes && !pev->group) {
/* Replace group name if not given */
ret = convert_exec_to_group(pev->target, &pev->group);
if (!pev->group) {
/* Set group name if not given */
if (!pev->uprobes) {
pev->group = strdup(PERFPROBE_GROUP);
ret = pev->group ? 0 : -ENOMEM;
} else
ret = convert_exec_to_group(pev->target, &pev->group);
if (ret != 0) {
pr_warning("Failed to make a group name.\n");
return ret;

View File

@ -220,8 +220,7 @@ int probe_file__add_event(int fd, struct probe_trace_event *tev)
pr_debug("Writing event: %s\n", buf);
if (!probe_event_dry_run) {
ret = write(fd, buf, strlen(buf));
if (ret <= 0) {
if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
ret = -errno;
pr_warning("Failed to write event: %s\n",
strerror_r(errno, sbuf, sizeof(sbuf)));

View File

@ -265,7 +265,7 @@ static SV *perl_process_callchain(struct perf_sample *sample,
if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
sample, NULL, NULL,
PERF_MAX_STACK_DEPTH) != 0) {
sysctl_perf_event_max_stack) != 0) {
pr_err("Failed to resolve callchain. Skipping\n");
goto exit;
}

View File

@ -10,6 +10,8 @@
#include "comm.h"
#include "unwind.h"
#include <api/fs/fs.h>
int thread__init_map_groups(struct thread *thread, struct machine *machine)
{
struct thread *leader;
@ -153,6 +155,23 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
return 0;
}
int thread__set_comm_from_proc(struct thread *thread)
{
char path[64];
char *comm = NULL;
size_t sz;
int err = -1;
if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
thread->pid_, thread->tid) >= (int)sizeof(path)) &&
procfs__read_str(path, &comm, &sz) == 0) {
comm[sz - 1] = '\0';
err = thread__set_comm(thread, comm, 0);
}
return err;
}
const char *thread__comm_str(const struct thread *thread)
{
const struct comm *comm = thread__comm(thread);
@ -233,7 +252,7 @@ void thread__find_cpumode_addr_location(struct thread *thread,
struct addr_location *al)
{
size_t i;
const u8 const cpumodes[] = {
const u8 cpumodes[] = {
PERF_RECORD_MISC_USER,
PERF_RECORD_MISC_KERNEL,
PERF_RECORD_MISC_GUEST_USER,

View File

@ -71,6 +71,8 @@ static inline int thread__set_comm(struct thread *thread, const char *comm,
return __thread__set_comm(thread, comm, timestamp, false);
}
int thread__set_comm_from_proc(struct thread *thread);
int thread__comm_len(struct thread *thread);
struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);

View File

@ -33,6 +33,8 @@ struct callchain_param callchain_param = {
unsigned int page_size;
int cacheline_size;
unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
bool test_attr__enabled;
bool perf_host = true;
@ -117,6 +119,40 @@ int rm_rf(char *path)
return rmdir(path);
}
/* A filter which removes dot files */
bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
{
return d->d_name[0] != '.';
}
/* lsdir reads a directory and store it in strlist */
struct strlist *lsdir(const char *name,
bool (*filter)(const char *, struct dirent *))
{
struct strlist *list = NULL;
DIR *dir;
struct dirent *d;
dir = opendir(name);
if (!dir)
return NULL;
list = strlist__new(NULL, NULL);
if (!list) {
errno = -ENOMEM;
goto out;
}
while ((d = readdir(dir)) != NULL) {
if (!filter || filter(name, d))
strlist__add(list, d->d_name);
}
out:
closedir(dir);
return list;
}
static int slow_copyfile(const char *from, const char *to)
{
int err = -1;

View File

@ -79,6 +79,7 @@
#include <termios.h>
#include <linux/bitops.h>
#include <termios.h>
#include "strlist.h"
extern const char *graph_line;
extern const char *graph_dotted_line;
@ -222,6 +223,8 @@ static inline int sane_case(int x, int high)
int mkdir_p(char *path, mode_t mode);
int rm_rf(char *path);
struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
bool lsdir_no_dot_filter(const char *name, struct dirent *d);
int copyfile(const char *from, const char *to);
int copyfile_mode(const char *from, const char *to, mode_t mode);
int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
@ -264,6 +267,7 @@ void sighandler_dump_stack(int sig);
extern unsigned int page_size;
extern int cacheline_size;
extern unsigned int sysctl_perf_event_max_stack;
struct parse_tag {
char tag;