From e543ad76914abec1acf6631604a4154cd7a2ca6b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 18:20:36 -0500 Subject: [PATCH 1/8] tracing: add cpu_file intialization for ftrace_dump Impact: fix to ftrace_dump output corruption The commit: b04cc6b1f6398b0e0b60d37e27ce51b4899672ec tracing/core: introduce per cpu tracing files added a new field to the iterator called cpu_file. This was a handle to differentiate between the per cpu trace output files and the all cpu "trace" file. The all cpu "trace" file required setting this to TRACE_PIPE_ALL_CPU. The problem is that the ftrace_dump sets up its own iterator but was not updated to handle this change. The result was only CPU 0 printing out on crash and a lot of "<0>"'s also being printed. Reported-by: Thomas Gleixner Tested-by: Darren Hart Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c8abbb0c8397..ab5cbcae43a1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3918,8 +3918,10 @@ void ftrace_dump(void) printk(KERN_TRACE "Dumping ftrace buffer:\n"); + /* Simulate the iterator */ iter.tr = &global_trace; iter.trace = current_trace; + iter.cpu_file = TRACE_PIPE_ALL_CPU; /* * We need to stop all tracing on all CPUS to read the From 4f3640f8a358f2183a8c966f299eeb55ca523e06 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 3 Mar 2009 23:52:42 -0500 Subject: [PATCH 2/8] ring-buffer: fix timestamp in partial ring_buffer_page_read If a partial ring_buffer_page_read happens, then some of the incremental timestamps may be lost. This patch writes the recent timestamp into the page that is passed back to the caller. A partial ring_buffer_page_read is where the full page would not be written back to the user, and instead, just part of the page is copied to the user. A full page would be a page swap with the ring buffer and the timestamps would be correct. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f2a163db52f9..f7473645b9c6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2461,6 +2461,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, unsigned long flags; unsigned int commit; unsigned int read; + u64 save_timestamp; int ret = -1; /* @@ -2515,6 +2516,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, if (len < size) goto out; + /* save the current timestamp, since the user will need it */ + save_timestamp = cpu_buffer->read_stamp; + /* Need to copy one event at a time */ do { memcpy(bpage->data + pos, rpage->data + rpos, size); @@ -2531,7 +2535,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, /* update bpage */ local_set(&bpage->commit, pos); - bpage->time_stamp = rpage->time_stamp; + bpage->time_stamp = save_timestamp; /* we copied everything to the beginning */ read = 0; From 2dc5d12b1f43134e9bc5037f69f4739cfdfab93e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 19:10:05 -0500 Subject: [PATCH 3/8] tracing: do not return EFAULT if read copied anything Impact: fix trace read to conform to standards Andrew Morton, Theodore Tso and H. Peter Anvin brought to my attention that a userspace read should not return -EFAULT if it succeeded in copying anything. It should only return -EFAULT if it failed to copy at all. This patch modifies the check of copy_from_user and updates the return code appropriately. I also used H. Peter Anvin's short cut rule to just test ret == count. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ab5cbcae43a1..57155dc53530 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -346,6 +346,9 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) int len; int ret; + if (!cnt) + return 0; + if (s->len <= s->readpos) return -EBUSY; @@ -353,9 +356,11 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) if (cnt > len) cnt = len; ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); - if (ret) + if (ret == cnt) return -EFAULT; + cnt -= ret; + s->readpos += len; return cnt; } @@ -3049,6 +3054,9 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, ssize_t ret; size_t size; + if (!count) + return 0; + /* Do we have previous read data to read? */ if (info->read < PAGE_SIZE) goto read; @@ -3073,8 +3081,10 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, size = count; ret = copy_to_user(ubuf, info->spare + info->read, size); - if (ret) + if (ret == size) return -EFAULT; + size -= ret; + *ppos += size; info->read += size; From e74da5235cec6cb71eb338c987f876ecc793138b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 20:31:11 -0500 Subject: [PATCH 4/8] tracing: fix seq read from trace files The buffer used by trace_seq was updated incorrectly. Instead of consuming what was actually read, it consumed the rest of the buffer on reads. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 57155dc53530..2e53e6f09440 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -361,7 +361,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) cnt -= ret; - s->readpos += len; + s->readpos += cnt; return cnt; } @@ -380,7 +380,7 @@ ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) if (!ret) return -EFAULT; - s->readpos += len; + s->readpos += cnt; return cnt; } From c032ef64d680717e4e8ce3da65da6419a35f8a2c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 20:34:24 -0500 Subject: [PATCH 5/8] tracing: add latency output format option With the removal of the latency_trace file, we lost the ability to see some of the finer details in a trace. Like the state of interrupts enabled, the preempt count, need resched, and if we are in an interrupt handler, softirq handler or not. This patch simply creates an option to bring back the old format. This also removes the warning about an unused variable that held the latency_trace file operations. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 24 ++---------------------- kernel/trace/trace.h | 3 ++- 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2e53e6f09440..55fcbb567950 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -299,6 +299,7 @@ static const char *trace_options[] = { "sym-userobj", "printk-msg-only", "context-info", + "latency-format", NULL }; @@ -1829,26 +1830,12 @@ static int tracing_open(struct inode *inode, struct file *file) iter = __tracing_open(inode, file); if (IS_ERR(iter)) ret = PTR_ERR(iter); - - return ret; -} - -static int tracing_lt_open(struct inode *inode, struct file *file) -{ - struct trace_iterator *iter; - int ret = 0; - - iter = __tracing_open(inode, file); - - if (IS_ERR(iter)) - ret = PTR_ERR(iter); - else + else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; return ret; } - static void * t_next(struct seq_file *m, void *v, loff_t *pos) { @@ -1927,13 +1914,6 @@ static struct file_operations tracing_fops = { .release = tracing_release, }; -static struct file_operations tracing_lt_fops = { - .open = tracing_lt_open, - .read = seq_read, - .llseek = seq_lseek, - .release = tracing_release, -}; - static struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 561bb5c5d988..12cd119cca32 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -651,7 +651,8 @@ enum trace_iterator_flags { TRACE_ITER_USERSTACKTRACE = 0x4000, TRACE_ITER_SYM_USEROBJ = 0x8000, TRACE_ITER_PRINTK_MSGONLY = 0x10000, - TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */ + TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ + TRACE_ITER_LATENCY_FMT = 0x40000, }; /* From 5fd73f862468280d4cbb5ba4321502f911f9f89a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 21:42:04 -0500 Subject: [PATCH 6/8] tracing: remove extra latency_trace method from trace structure Impact: clean up The trace and latency_trace function pointers are identical for every tracer but the function tracer. The differences in the function tracer are trivial (latency output puts paranthesis around parent). This patch removes the latency_trace pointer and all prints will now just use the trace output function pointer. Signed-off-by: Steven Rostedt --- kernel/trace/blktrace.c | 1 - kernel/trace/trace.c | 2 +- kernel/trace/trace_branch.c | 1 - kernel/trace/trace_output.c | 32 -------------------------------- kernel/trace/trace_output.h | 1 - 5 files changed, 1 insertion(+), 36 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e82cb9e930cc..e39679a72a3b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1231,7 +1231,6 @@ static struct tracer blk_tracer __read_mostly = { static struct trace_event trace_blk_event = { .type = TRACE_BLK, .trace = blk_trace_event_print, - .latency_trace = blk_trace_event_print, .binary = blk_trace_event_print_binary, }; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 55fcbb567950..21b89ecb8480 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1485,7 +1485,7 @@ static enum print_line_t print_lat_fmt(struct trace_iterator *iter) } if (event) - return event->latency_trace(iter, sym_flags); + return event->trace(iter, sym_flags); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) goto partial; diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index c2e68d440c4d..aaa0755268b9 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -159,7 +159,6 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, static struct trace_event trace_branch_event = { .type = TRACE_BRANCH, .trace = trace_branch_print, - .latency_trace = trace_branch_print, }; static struct tracer branch_trace __read_mostly = diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 9fc815031b09..306fef84c503 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -437,8 +437,6 @@ int register_ftrace_event(struct trace_event *event) if (event->trace == NULL) event->trace = trace_nop_print; - if (event->latency_trace == NULL) - event->latency_trace = trace_nop_print; if (event->raw == NULL) event->raw = trace_nop_print; if (event->hex == NULL) @@ -480,29 +478,6 @@ enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) } /* TRACE_FN */ -static enum print_line_t trace_fn_latency(struct trace_iterator *iter, - int flags) -{ - struct ftrace_entry *field; - struct trace_seq *s = &iter->seq; - - trace_assign_type(field, iter->ent); - - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; - if (!trace_seq_puts(s, " (")) - goto partial; - if (!seq_print_ip_sym(s, field->parent_ip, flags)) - goto partial; - if (!trace_seq_puts(s, ")\n")) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; -} - static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) { struct ftrace_entry *field; @@ -573,7 +548,6 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) static struct trace_event trace_fn_event = { .type = TRACE_FN, .trace = trace_fn_trace, - .latency_trace = trace_fn_latency, .raw = trace_fn_raw, .hex = trace_fn_hex, .binary = trace_fn_bin, @@ -705,7 +679,6 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, static struct trace_event trace_ctx_event = { .type = TRACE_CTX, .trace = trace_ctx_print, - .latency_trace = trace_ctx_print, .raw = trace_ctx_raw, .hex = trace_ctx_hex, .binary = trace_ctxwake_bin, @@ -714,7 +687,6 @@ static struct trace_event trace_ctx_event = { static struct trace_event trace_wake_event = { .type = TRACE_WAKE, .trace = trace_wake_print, - .latency_trace = trace_wake_print, .raw = trace_wake_raw, .hex = trace_wake_hex, .binary = trace_ctxwake_bin, @@ -770,7 +742,6 @@ static enum print_line_t trace_special_bin(struct trace_iterator *iter, static struct trace_event trace_special_event = { .type = TRACE_SPECIAL, .trace = trace_special_print, - .latency_trace = trace_special_print, .raw = trace_special_print, .hex = trace_special_hex, .binary = trace_special_bin, @@ -808,7 +779,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, static struct trace_event trace_stack_event = { .type = TRACE_STACK, .trace = trace_stack_print, - .latency_trace = trace_stack_print, .raw = trace_special_print, .hex = trace_special_hex, .binary = trace_special_bin, @@ -838,7 +808,6 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, static struct trace_event trace_user_stack_event = { .type = TRACE_USER_STACK, .trace = trace_user_stack_print, - .latency_trace = trace_user_stack_print, .raw = trace_special_print, .hex = trace_special_hex, .binary = trace_special_bin, @@ -883,7 +852,6 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) static struct trace_event trace_print_event = { .type = TRACE_PRINT, .trace = trace_print_print, - .latency_trace = trace_print_print, .raw = trace_print_raw, }; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 551a25a72217..8a34d688ed63 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -10,7 +10,6 @@ struct trace_event { struct hlist_node node; int type; trace_print_func trace; - trace_print_func latency_trace; trace_print_func raw; trace_print_func hex; trace_print_func binary; From 27d48be84477d2f0a2e2ac3738a3971dece631d5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 21:57:29 -0500 Subject: [PATCH 7/8] tracing: consolidate print_lat_fmt and print_trace_fmt Impact: clean up Both print_lat_fmt and print_trace_fmt do pretty much the same thing except for one different function call. This patch consolidates the two functions and adds an if statement to perform the difference. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 39 +++++++-------------------------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 21b89ecb8480..d1ef43999d9e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1468,33 +1468,6 @@ static void test_cpu_buff_start(struct trace_iterator *iter) trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); } -static enum print_line_t print_lat_fmt(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_event *event; - struct trace_entry *entry = iter->ent; - - test_cpu_buff_start(iter); - - event = ftrace_find_event(entry->type); - - if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - if (!trace_print_lat_context(iter)) - goto partial; - } - - if (event) - return event->trace(iter, sym_flags); - - if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) - goto partial; - - return TRACE_TYPE_HANDLED; -partial: - return TRACE_TYPE_PARTIAL_LINE; -} - static enum print_line_t print_trace_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1509,8 +1482,13 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) event = ftrace_find_event(entry->type); if (trace_flags & TRACE_ITER_CONTEXT_INFO) { - if (!trace_print_context(iter)) - goto partial; + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + if (!trace_print_lat_context(iter)) + goto partial; + } else { + if (!trace_print_context(iter)) + goto partial; + } } if (event) @@ -1652,9 +1630,6 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) if (trace_flags & TRACE_ITER_RAW) return print_raw_fmt(iter); - if (iter->iter_flags & TRACE_FILE_LAT_FMT) - return print_lat_fmt(iter); - return print_trace_fmt(iter); } From e9d25fe6eaa2c720bb3ea661b660e58d54fa38bf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 22:15:30 -0500 Subject: [PATCH 8/8] tracing: have latency tracers set the latency format The latency tracers (irqsoff, preemptoff, preemptirqsoff, and wakeup) are pretty useless with the default output format. This patch makes them automatically enable the latency format when they are selected. They also record the state of the latency option, and if it was not enabled when selected, they disable it on reset. Signed-off-by: Steven Rostedt --- kernel/trace/trace_irqsoff.c | 8 ++++++++ kernel/trace/trace_sched_wakeup.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 9e5ebd844158..b923d13e2fad 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -32,6 +32,8 @@ enum { static int trace_type __read_mostly; +static int save_lat_flag; + #ifdef CONFIG_PREEMPT_TRACER static inline int preempt_trace(void) @@ -370,6 +372,9 @@ static void stop_irqsoff_tracer(struct trace_array *tr) static void __irqsoff_tracer_init(struct trace_array *tr) { + save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; + trace_flags |= TRACE_ITER_LATENCY_FMT; + tracing_max_latency = 0; irqsoff_trace = tr; /* make sure that the tracer is visible */ @@ -380,6 +385,9 @@ static void __irqsoff_tracer_init(struct trace_array *tr) static void irqsoff_tracer_reset(struct trace_array *tr) { stop_irqsoff_tracer(tr); + + if (!save_lat_flag) + trace_flags &= ~TRACE_ITER_LATENCY_FMT; } static void irqsoff_tracer_start(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index db55f7aaa640..3c5ad6b2ec84 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -32,6 +32,8 @@ static raw_spinlock_t wakeup_lock = static void __wakeup_reset(struct trace_array *tr); +static int save_lat_flag; + #ifdef CONFIG_FUNCTION_TRACER /* * irqsoff uses its own tracer function to keep the overhead down: @@ -324,6 +326,9 @@ static void stop_wakeup_tracer(struct trace_array *tr) static int __wakeup_tracer_init(struct trace_array *tr) { + save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; + trace_flags |= TRACE_ITER_LATENCY_FMT; + tracing_max_latency = 0; wakeup_trace = tr; start_wakeup_tracer(tr); @@ -347,6 +352,9 @@ static void wakeup_tracer_reset(struct trace_array *tr) stop_wakeup_tracer(tr); /* make sure we put back any tasks we are tracing */ wakeup_reset(tr); + + if (!save_lat_flag) + trace_flags &= ~TRACE_ITER_LATENCY_FMT; } static void wakeup_tracer_start(struct trace_array *tr)