linux/include/trace/events/sched.h

576 lines
14 KiB
C
Raw Normal View History

#undef TRACE_SYSTEM
#define TRACE_SYSTEM sched
#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_SCHED_H
#include <linux/sched/numa_balancing.h>
#include <linux/tracepoint.h>
#include <linux/binfmts.h>
/*
* Tracepoint for calling kthread_stop, performed to end a kthread:
*/
TRACE_EVENT(sched_kthread_stop,
TP_PROTO(struct task_struct *t),
TP_ARGS(t),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
),
TP_fast_assign(
memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
__entry->pid = t->pid;
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
);
/*
* Tracepoint for the return value of the kthread stopping:
*/
TRACE_EVENT(sched_kthread_stop_ret,
TP_PROTO(int ret),
TP_ARGS(ret),
TP_STRUCT__entry(
__field( int, ret )
),
TP_fast_assign(
__entry->ret = ret;
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("ret=%d", __entry->ret)
);
/*
* Tracepoint for waking up a task:
*/
DECLARE_EVENT_CLASS(sched_wakeup_template,
TP_PROTO(struct task_struct *p),
TP_ARGS(__perf_task(p)),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, success )
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
__field( int, target_cpu )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio; /* XXX SCHED_DEADLINE */
__entry->success = 1; /* rudiment, kill when possible */
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
__entry->target_cpu = task_cpu(p);
),
TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
__entry->comm, __entry->pid, __entry->prio,
__entry->target_cpu)
);
/*
* Tracepoint called when waking a task; this tracepoint is guaranteed to be
* called from the waking context.
*/
DEFINE_EVENT(sched_wakeup_template, sched_waking,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
/*
* Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
* It it not always called from the waking context.
*/
DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
/*
* Tracepoint for waking up a new task:
*/
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
#ifdef CONFIG_SCHED_DEBUG
BUG_ON(p != current);
#endif /* CONFIG_SCHED_DEBUG */
/*
* Preemption ignores task state, therefore preempted tasks are always
* RUNNING (we will not have dequeued if state != RUNNING).
*/
if (preempt)
return TASK_STATE_MAX;
return __get_task_state(p);
}
#endif /* CREATE_TRACE_POINTS */
/*
* Tracepoint for task switches, performed by the scheduler:
*/
TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt,
struct task_struct *prev,
struct task_struct *next),
TP_ARGS(preempt, prev, next),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
__field( pid_t, prev_pid )
__field( int, prev_prio )
__field( long, prev_state )
__array( char, next_comm, TASK_COMM_LEN )
__field( pid_t, next_pid )
__field( int, next_prio )
),
TP_fast_assign(
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
__entry->prev_pid = prev->pid;
__entry->prev_prio = prev->prio;
__entry->prev_state = __trace_sched_switch_state(preempt, prev);
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
/* XXX SCHED_DEADLINE */
),
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
(__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
__print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
{ 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
{ 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
{ 0x40, "P" }, { 0x80, "I" }) :
"R",
__entry->prev_state & TASK_STATE_MAX ? "+" : "",
__entry->next_comm, __entry->next_pid, __entry->next_prio)
);
/*
* Tracepoint for a task being migrated:
*/
TRACE_EVENT(sched_migrate_task,
TP_PROTO(struct task_struct *p, int dest_cpu),
TP_ARGS(p, dest_cpu),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
__field( int, orig_cpu )
__field( int, dest_cpu )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio; /* XXX SCHED_DEADLINE */
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
__entry->comm, __entry->pid, __entry->prio,
__entry->orig_cpu, __entry->dest_cpu)
);
DECLARE_EVENT_CLASS(sched_process_template,
TP_PROTO(struct task_struct *p),
TP_ARGS(p),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio; /* XXX SCHED_DEADLINE */
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("comm=%s pid=%d prio=%d",
__entry->comm, __entry->pid, __entry->prio)
);
/*
* Tracepoint for freeing a task:
*/
DEFINE_EVENT(sched_process_template, sched_process_free,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
/*
* Tracepoint for a task exiting:
*/
DEFINE_EVENT(sched_process_template, sched_process_exit,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
/*
* Tracepoint for waiting on task to unschedule:
*/
DEFINE_EVENT(sched_process_template, sched_wait_task,
TP_PROTO(struct task_struct *p),
TP_ARGS(p));
/*
* Tracepoint for a waiting task:
*/
TRACE_EVENT(sched_process_wait,
TP_PROTO(struct pid *pid),
TP_ARGS(pid),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, prio )
),
TP_fast_assign(
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
__entry->pid = pid_nr(pid);
__entry->prio = current->prio; /* XXX SCHED_DEADLINE */
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("comm=%s pid=%d prio=%d",
__entry->comm, __entry->pid, __entry->prio)
);
/*
* Tracepoint for do_fork:
*/
TRACE_EVENT(sched_process_fork,
TP_PROTO(struct task_struct *parent, struct task_struct *child),
TP_ARGS(parent, child),
TP_STRUCT__entry(
__array( char, parent_comm, TASK_COMM_LEN )
__field( pid_t, parent_pid )
__array( char, child_comm, TASK_COMM_LEN )
__field( pid_t, child_pid )
),
TP_fast_assign(
memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
__entry->parent_pid = parent->pid;
memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
__entry->child_pid = child->pid;
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
__entry->parent_comm, __entry->parent_pid,
__entry->child_comm, __entry->child_pid)
);
/*
* Tracepoint for exec:
*/
TRACE_EVENT(sched_process_exec,
TP_PROTO(struct task_struct *p, pid_t old_pid,
struct linux_binprm *bprm),
TP_ARGS(p, old_pid, bprm),
TP_STRUCT__entry(
__string( filename, bprm->filename )
__field( pid_t, pid )
__field( pid_t, old_pid )
),
TP_fast_assign(
__assign_str(filename, bprm->filename);
__entry->pid = p->pid;
__entry->old_pid = old_pid;
),
TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
__entry->pid, __entry->old_pid)
);
/*
* XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
* adding sched_stat support to SCHED_FIFO/RR would be welcome.
*/
DECLARE_EVENT_CLASS(sched_stat_template,
TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(__perf_task(tsk), __perf_count(delay)),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( u64, delay )
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->delay = delay;
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("comm=%s pid=%d delay=%Lu [ns]",
__entry->comm, __entry->pid,
(unsigned long long)__entry->delay)
);
/*
* Tracepoint for accounting wait time (time the task is runnable
* but not actually running due to scheduler contention).
*/
DEFINE_EVENT(sched_stat_template, sched_stat_wait,
TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(tsk, delay));
/*
* Tracepoint for accounting sleep time (time the task is not runnable,
* including iowait, see below).
*/
DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(tsk, delay));
/*
* Tracepoint for accounting iowait time (time the task is not runnable
* due to waiting on IO to complete).
*/
DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(tsk, delay));
/*
* Tracepoint for accounting blocked time (time the task is in uninterruptible).
*/
DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
TP_PROTO(struct task_struct *tsk, u64 delay),
TP_ARGS(tsk, delay));
/*
* Tracepoint for accounting runtime (time the task is executing
* on a CPU).
*/
DECLARE_EVENT_CLASS(sched_stat_runtime,
TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
TP_ARGS(tsk, __perf_count(runtime), vruntime),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( u64, runtime )
__field( u64, vruntime )
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->runtime = runtime;
__entry->vruntime = vruntime;
),
events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==<x>' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-10-15 17:50:39 +08:00
TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
__entry->comm, __entry->pid,
(unsigned long long)__entry->runtime,
(unsigned long long)__entry->vruntime)
);
DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
TP_ARGS(tsk, runtime, vruntime));
/*
* Tracepoint for showing priority inheritance modifying a tasks
* priority.
*/
TRACE_EVENT(sched_pi_setprio,
TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
TP_ARGS(tsk, pi_task),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, oldprio )
__field( int, newprio )
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->oldprio = tsk->prio;
__entry->newprio = pi_task ? pi_task->prio : tsk->prio;
/* XXX SCHED_DEADLINE bits missing */
),
TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
__entry->comm, __entry->pid,
__entry->oldprio, __entry->newprio)
);
#ifdef CONFIG_DETECT_HUNG_TASK
TRACE_EVENT(sched_process_hang,
TP_PROTO(struct task_struct *tsk),
TP_ARGS(tsk),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
),
TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
);
#endif /* CONFIG_DETECT_HUNG_TASK */
sched: add tracepoints related to NUMA task migration This patch adds three tracepoints o trace_sched_move_numa when a task is moved to a node o trace_sched_swap_numa when a task is swapped with another task o trace_sched_stick_numa when a numa-related migration fails The tracepoints allow the NUMA scheduler activity to be monitored and the following high-level metrics can be calculated o NUMA migrated stuck nr trace_sched_stick_numa o NUMA migrated idle nr trace_sched_move_numa o NUMA migrated swapped nr trace_sched_swap_numa o NUMA local swapped trace_sched_swap_numa src_nid == dst_nid (should never happen) o NUMA remote swapped trace_sched_swap_numa src_nid != dst_nid (should == NUMA migrated swapped) o NUMA group swapped trace_sched_swap_numa src_ngid == dst_ngid Maybe a small number of these are acceptable but a high number would be a major surprise. It would be even worse if bounces are frequent. o NUMA avg task migs. Average number of migrations for tasks o NUMA stddev task mig Self-explanatory o NUMA max task migs. Maximum number of migrations for a single task In general the intent of the tracepoints is to help diagnose problems where automatic NUMA balancing appears to be doing an excessive amount of useless work. [akpm@linux-foundation.org: remove semicolon-after-if, repair coding-style] Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Alex Thorlton <athorlton@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-01-22 07:51:03 +08:00
DECLARE_EVENT_CLASS(sched_move_task_template,
TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
TP_ARGS(tsk, src_cpu, dst_cpu),
TP_STRUCT__entry(
__field( pid_t, pid )
__field( pid_t, tgid )
__field( pid_t, ngid )
__field( int, src_cpu )
__field( int, src_nid )
__field( int, dst_cpu )
__field( int, dst_nid )
),
TP_fast_assign(
__entry->pid = task_pid_nr(tsk);
__entry->tgid = task_tgid_nr(tsk);
__entry->ngid = task_numa_group_id(tsk);
__entry->src_cpu = src_cpu;
__entry->src_nid = cpu_to_node(src_cpu);
__entry->dst_cpu = dst_cpu;
__entry->dst_nid = cpu_to_node(dst_cpu);
),
TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
__entry->pid, __entry->tgid, __entry->ngid,
__entry->src_cpu, __entry->src_nid,
__entry->dst_cpu, __entry->dst_nid)
);
/*
* Tracks migration of tasks from one runqueue to another. Can be used to
* detect if automatic NUMA balancing is bouncing between nodes
*/
DEFINE_EVENT(sched_move_task_template, sched_move_numa,
TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
TP_ARGS(tsk, src_cpu, dst_cpu)
);
DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
TP_ARGS(tsk, src_cpu, dst_cpu)
);
TRACE_EVENT(sched_swap_numa,
TP_PROTO(struct task_struct *src_tsk, int src_cpu,
struct task_struct *dst_tsk, int dst_cpu),
TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
TP_STRUCT__entry(
__field( pid_t, src_pid )
__field( pid_t, src_tgid )
__field( pid_t, src_ngid )
__field( int, src_cpu )
__field( int, src_nid )
__field( pid_t, dst_pid )
__field( pid_t, dst_tgid )
__field( pid_t, dst_ngid )
__field( int, dst_cpu )
__field( int, dst_nid )
),
TP_fast_assign(
__entry->src_pid = task_pid_nr(src_tsk);
__entry->src_tgid = task_tgid_nr(src_tsk);
__entry->src_ngid = task_numa_group_id(src_tsk);
__entry->src_cpu = src_cpu;
__entry->src_nid = cpu_to_node(src_cpu);
__entry->dst_pid = task_pid_nr(dst_tsk);
__entry->dst_tgid = task_tgid_nr(dst_tsk);
__entry->dst_ngid = task_numa_group_id(dst_tsk);
__entry->dst_cpu = dst_cpu;
__entry->dst_nid = cpu_to_node(dst_cpu);
),
TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
__entry->src_cpu, __entry->src_nid,
__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
__entry->dst_cpu, __entry->dst_nid)
);
/*
* Tracepoint for waking a polling cpu without an IPI.
*/
TRACE_EVENT(sched_wake_idle_without_ipi,
TP_PROTO(int cpu),
TP_ARGS(cpu),
TP_STRUCT__entry(
__field( int, cpu )
),
TP_fast_assign(
__entry->cpu = cpu;
),
TP_printk("cpu=%d", __entry->cpu)
);
#endif /* _TRACE_SCHED_H */
tracing: create automated trace defines This patch lowers the number of places a developer must modify to add new tracepoints. The current method to add a new tracepoint into an existing system is to write the trace point macro in the trace header with one of the macros TRACE_EVENT, TRACE_FORMAT or DECLARE_TRACE, then they must add the same named item into the C file with the macro DEFINE_TRACE(name) and then add the trace point. This change cuts out the needing to add the DEFINE_TRACE(name). Every file that uses the tracepoint must still include the trace/<type>.h file, but the one C file must also add a define before the including of that file. #define CREATE_TRACE_POINTS #include <trace/mytrace.h> This will cause the trace/mytrace.h file to also produce the C code necessary to implement the trace point. Note, if more than one trace/<type>.h is used to create the C code it is best to list them all together. #define CREATE_TRACE_POINTS #include <trace/foo.h> #include <trace/bar.h> #include <trace/fido.h> Thanks to Mathieu Desnoyers and Christoph Hellwig for coming up with the cleaner solution of the define above the includes over my first design to have the C code include a "special" header. This patch converts sched, irq and lockdep and skb to use this new method. Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: Zhao Lei <zhaolei@cn.fujitsu.com> Cc: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2009-04-10 21:36:00 +08:00
/* This part must be outside protection */
#include <trace/define_trace.h>