From 5c5317de147e9b38ea9c4cbdc2d15bed7648d036 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:26:53 +0100 Subject: [PATCH 1/7] x86, ftrace, hw-branch-tracer: support hotplug cpus Support hotplug cpus. Reported-by: Andi Kleen Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_hw_branches.c | 125 ++++++++++++++++++++++++++----- 1 file changed, 108 insertions(+), 17 deletions(-) diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index df21c1e72b95..398195397c75 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -1,7 +1,8 @@ /* * h/w branch tracer for x86 based on bts * - * Copyright (C) 2008 Markus Metzger + * Copyright (C) 2008-2009 Intel Corporation. + * Markus Metzger , 2008-2009 * */ @@ -10,6 +11,9 @@ #include #include #include +#include +#include +#include #include @@ -19,13 +23,31 @@ #define SIZEOF_BTS (1 << 13) +/* The tracer mutex protects the below per-cpu tracer array. + It needs to be held to: + - start tracing on all cpus + - stop tracing on all cpus + - start tracing on a single hotplug cpu + - stop tracing on a single hotplug cpu + - read the trace from all cpus + - read the trace from a single cpu +*/ +static DEFINE_MUTEX(bts_tracer_mutex); static DEFINE_PER_CPU(struct bts_tracer *, tracer); static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); #define this_tracer per_cpu(tracer, smp_processor_id()) #define this_buffer per_cpu(buffer, smp_processor_id()) +static int __read_mostly trace_hw_branches_enabled; + +/* + * Start tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_mutex must be locked. + */ static void bts_trace_start_cpu(void *arg) { if (this_tracer) @@ -43,14 +65,20 @@ static void bts_trace_start_cpu(void *arg) static void bts_trace_start(struct trace_array *tr) { - int cpu; + mutex_lock(&bts_tracer_mutex); - tracing_reset_online_cpus(tr); + on_each_cpu(bts_trace_start_cpu, NULL, 1); + trace_hw_branches_enabled = 1; - for_each_cpu(cpu, cpu_possible_mask) - smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); + mutex_unlock(&bts_tracer_mutex); } +/* + * Start tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_mutex must be locked. + */ static void bts_trace_stop_cpu(void *arg) { if (this_tracer) { @@ -61,20 +89,58 @@ static void bts_trace_stop_cpu(void *arg) static void bts_trace_stop(struct trace_array *tr) { - int cpu; + mutex_lock(&bts_tracer_mutex); - for_each_cpu(cpu, cpu_possible_mask) - smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); + trace_hw_branches_enabled = 0; + on_each_cpu(bts_trace_stop_cpu, NULL, 1); + + mutex_unlock(&bts_tracer_mutex); } +static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + mutex_lock(&bts_tracer_mutex); + + if (!trace_hw_branches_enabled) + goto out; + + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); + break; + case CPU_DOWN_PREPARE: + smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); + break; + } + + out: + mutex_unlock(&bts_tracer_mutex); + return NOTIFY_DONE; +} + +static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { + .notifier_call = bts_hotcpu_handler +}; + static int bts_trace_init(struct trace_array *tr) { + register_hotcpu_notifier(&bts_hotcpu_notifier); tracing_reset_online_cpus(tr); bts_trace_start(tr); return 0; } +static void bts_trace_reset(struct trace_array *tr) +{ + bts_trace_stop(tr); + unregister_hotcpu_notifier(&bts_hotcpu_notifier); +} + static void bts_trace_print_header(struct seq_file *m) { seq_puts(m, @@ -108,18 +174,34 @@ void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) { struct ring_buffer_event *event; struct hw_branch_entry *entry; - unsigned long irq; + unsigned long irq1, irq2; + int cpu; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); - if (!event) + if (unlikely(!tr)) return; + + if (unlikely(!trace_hw_branches_enabled)) + return; + + local_irq_save(irq1); + cpu = raw_smp_processor_id(); + if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) + goto out; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq2); + if (!event) + goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, from); entry->ent.type = TRACE_HW_BRANCHES; - entry->ent.cpu = smp_processor_id(); + entry->ent.cpu = cpu; entry->from = from; entry->to = to; - ring_buffer_unlock_commit(tr->buffer, event, irq); + ring_buffer_unlock_commit(tr->buffer, event, irq2); + + out: + atomic_dec(&tr->data[cpu]->disabled); + local_irq_restore(irq1); } static void trace_bts_at(struct trace_array *tr, @@ -143,6 +225,11 @@ static void trace_bts_at(struct trace_array *tr, } } +/* + * Collect the trace on the current cpu and write it into the ftrace buffer. + * + * pre: bts_tracer_mutex must be locked + */ static void trace_bts_cpu(void *arg) { struct trace_array *tr = (struct trace_array *) arg; @@ -152,6 +239,9 @@ static void trace_bts_cpu(void *arg) if (!this_tracer) return; + if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) + return; + ds_suspend_bts(this_tracer); trace = ds_read_bts(this_tracer); if (!trace) @@ -171,17 +261,18 @@ static void trace_bts_cpu(void *arg) static void trace_bts_prepare(struct trace_iterator *iter) { - int cpu; + mutex_lock(&bts_tracer_mutex); - for_each_cpu(cpu, cpu_possible_mask) - smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1); + on_each_cpu(trace_bts_cpu, iter->tr, 1); + + mutex_unlock(&bts_tracer_mutex); } struct tracer bts_tracer __read_mostly = { .name = "hw-branch-tracer", .init = bts_trace_init, - .reset = bts_trace_stop, + .reset = bts_trace_reset, .print_header = bts_trace_print_header, .print_line = bts_trace_print_line, .start = bts_trace_start, From b1818748b0cf9427e48acf9713295e829a0d715f Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:31:01 +0100 Subject: [PATCH 2/7] x86, ftrace, hw-branch-tracer: dump trace on oops Dump the branch trace on an oops (based on ftrace_dump_on_oops). Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 6 ++++++ include/linux/ftrace.h | 13 +++++++++++++ kernel/trace/trace.h | 1 - kernel/trace/trace_hw_branches.c | 29 ++++++++++++++++++++++------- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f8661..077c9ea655fc 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -195,6 +196,11 @@ unsigned __kprobes long oops_begin(void) int cpu; unsigned long flags; + /* notify the hw-branch tracer so it may disable tracing and + add the last trace to the trace buffer - + the earlier this happens, the more useful the trace. */ + trace_hw_branch_oops(); + oops_enter(); /* racy, but better than risking deadlock. */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 054721487574..9f7880d87c39 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -496,4 +496,17 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) #endif /* CONFIG_TRACING */ + +#ifdef CONFIG_HW_BRANCH_TRACER + +void trace_hw_branch(u64 from, u64 to); +void trace_hw_branch_oops(void); + +#else /* CONFIG_HW_BRANCH_TRACER */ + +static inline void trace_hw_branch(u64 from, u64 to) {} +static inline void trace_hw_branch_oops(void) {} + +#endif /* CONFIG_HW_BRANCH_TRACER */ + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54b72781e920..b96037d970df 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -438,7 +438,6 @@ void trace_function(struct trace_array *tr, void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 398195397c75..e56df2c7d679 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -40,6 +40,7 @@ static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); #define this_buffer per_cpu(buffer, smp_processor_id()) static int __read_mostly trace_hw_branches_enabled; +static struct trace_array *hw_branch_trace __read_mostly; /* @@ -128,6 +129,8 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { static int bts_trace_init(struct trace_array *tr) { + hw_branch_trace = tr; + register_hotcpu_notifier(&bts_hotcpu_notifier); tracing_reset_online_cpus(tr); bts_trace_start(tr); @@ -170,8 +173,9 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) return TRACE_TYPE_UNHANDLED; } -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) +void trace_hw_branch(u64 from, u64 to) { + struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; unsigned long irq1, irq2; @@ -204,8 +208,7 @@ void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) local_irq_restore(irq1); } -static void trace_bts_at(struct trace_array *tr, - const struct bts_trace *trace, void *at) +static void trace_bts_at(const struct bts_trace *trace, void *at) { struct bts_struct bts; int err = 0; @@ -220,7 +223,7 @@ static void trace_bts_at(struct trace_array *tr, switch (bts.qualifier) { case BTS_BRANCH: - trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); + trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to); break; } } @@ -236,12 +239,15 @@ static void trace_bts_cpu(void *arg) const struct bts_trace *trace; unsigned char *at; - if (!this_tracer) + if (unlikely(!tr)) return; if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) return; + if (unlikely(!this_tracer)) + return; + ds_suspend_bts(this_tracer); trace = ds_read_bts(this_tracer); if (!trace) @@ -249,11 +255,11 @@ static void trace_bts_cpu(void *arg) for (at = trace->ds.top; (void *)at < trace->ds.end; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); for (at = trace->ds.begin; (void *)at < trace->ds.top; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); out: ds_resume_bts(this_tracer); @@ -268,6 +274,15 @@ static void trace_bts_prepare(struct trace_iterator *iter) mutex_unlock(&bts_tracer_mutex); } +void trace_hw_branch_oops(void) +{ + mutex_lock(&bts_tracer_mutex); + + trace_bts_cpu(hw_branch_trace); + + mutex_unlock(&bts_tracer_mutex); +} + struct tracer bts_tracer __read_mostly = { .name = "hw-branch-tracer", From e23b8ad83430a6fdfbdbfac365f5b0312dd57f10 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:33:31 +0100 Subject: [PATCH 3/7] x86, ftrace, hw-branch-tracer: reset trace buffer on close Reset the ftrace buffer on close. Since we use cyclic buffers, the trace is not contiguous, anyway. Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_hw_branches.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index e56df2c7d679..372b47ac3154 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -274,6 +274,11 @@ static void trace_bts_prepare(struct trace_iterator *iter) mutex_unlock(&bts_tracer_mutex); } +static void trace_bts_close(struct trace_iterator *iter) +{ + tracing_reset_online_cpus(iter->tr); +} + void trace_hw_branch_oops(void) { mutex_lock(&bts_tracer_mutex); @@ -292,7 +297,8 @@ struct tracer bts_tracer __read_mostly = .print_line = bts_trace_print_line, .start = bts_trace_start, .stop = bts_trace_stop, - .open = trace_bts_prepare + .open = trace_bts_prepare, + .close = trace_bts_close }; __init static int init_bts_trace(void) From 11edda06289d412d13ff7c672bd72e043f637e74 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:29:16 +0100 Subject: [PATCH 4/7] x86, ftrace, hw-branch-tracer: change trace format Change the hw-branch-tracer format to be more readable. Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_hw_branches.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 372b47ac3154..fff3545fc866 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -146,10 +146,7 @@ static void bts_trace_reset(struct trace_array *tr) static void bts_trace_print_header(struct seq_file *m) { - seq_puts(m, - "# CPU# FROM TO FUNCTION\n"); - seq_puts(m, - "# | | | |\n"); + seq_puts(m, "# CPU# TO <- FROM\n"); } static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) @@ -157,15 +154,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) struct trace_entry *entry = iter->ent; struct trace_seq *seq = &iter->seq; struct hw_branch_entry *it; + unsigned long symflags = TRACE_ITER_SYM_OFFSET; trace_assign_type(it, entry); if (entry->type == TRACE_HW_BRANCHES) { if (trace_seq_printf(seq, "%4d ", entry->cpu) && - trace_seq_printf(seq, "0x%016llx -> 0x%016llx ", - it->from, it->to) && - (!it->from || - seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) && + seq_print_ip_sym(seq, it->to, symflags) && + trace_seq_printf(seq, "\t <- ") && + seq_print_ip_sym(seq, it->from, symflags) && trace_seq_printf(seq, "\n")) return TRACE_TYPE_HANDLED; return TRACE_TYPE_PARTIAL_LINE;; From e2ea5399bb4fb7aaafb08f846db453f4eec55160 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:35:58 +0100 Subject: [PATCH 5/7] x86, ftrace, hw-branch-tracer: documentation Document the hw-branch-tracer in the ftrace documentation. Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 74 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 803b1318b13d..758fb42a1b68 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -165,6 +165,8 @@ Here is the list of current tracers that may be configured. nop - This is not a tracer. To remove all tracers from tracing simply echo "nop" into current_tracer. + hw-branch-tracer - traces branches on all cpu's in a circular buffer. + Examples of using the tracer ---------------------------- @@ -1152,6 +1154,78 @@ int main (int argc, char **argv) return 0; } + +hw-branch-tracer (x86 only) +--------------------------- + +This tracer uses the x86 last branch tracing hardware feature to +collect a branch trace on all cpus with relatively low overhead. + +The tracer uses a fixed-size circular buffer per cpu and only +traces ring 0 branches. The trace file dumps that buffer in the +following format: + +# tracer: hw-branch-tracer +# +# CPU# TO <- FROM + 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6 + 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a + 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf + 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf + 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a + 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf + + +The tracer may be used to dump the trace for the oops'ing cpu on a +kernel oops into the system log. To enable this, ftrace_dump_on_oops +must be set. To set ftrace_dump_on_oops, one can either use the sysctl +function or set it via the proc system interface. + + sysctl kernel.ftrace_dump_on_oops=1 + +or + + echo 1 > /proc/sys/kernel/ftrace_dump_on_oops + + +Here's an example of such a dump after a null pointer dereference in a +kernel module: + +[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 +[57848.106019] IP: [] open+0x6/0x14 [oops] +[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0 +[57848.106019] Oops: 0002 [#1] SMP +[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus +[57848.106019] Dumping ftrace buffer: +[57848.106019] --------------------------------- +[...] +[57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24 +[57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165 +[57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165 +[57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165 +[57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165 +[57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops] +[57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30 +[57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b +[57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31 +[57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1 +[57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30 +[...] +[57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2 +[57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881 +[57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881 +[57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96 +[...] +[57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3 +[57848.106019] --------------------------------- +[57848.106019] CPU 0 +[57848.106019] Modules linked in: oops +[57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23 +[57848.106019] RIP: 0010:[] [] open+0x6/0x14 [oops] +[57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246 +[...] + + dynamic ftrace -------------- From ce5e5540c0e839781e7cd134517d5d2e9e819636 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:38:35 +0100 Subject: [PATCH 6/7] x86, ds, bts: cleanup DS configuration Cleanup the cpuid check for DS configuration. Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index da91701a2348..169a120587be 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger , 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ @@ -890,7 +890,7 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) } static const struct ds_configuration ds_cfg_netburst = { - .name = "netburst", + .name = "Netburst", .ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_user] = (1 << 6), @@ -904,7 +904,7 @@ static const struct ds_configuration ds_cfg_netburst = { #endif }; static const struct ds_configuration ds_cfg_pentium_m = { - .name = "pentium m", + .name = "Pentium M", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .sizeof_field = sizeof(long), @@ -915,8 +915,8 @@ static const struct ds_configuration ds_cfg_pentium_m = { .sizeof_rec[ds_pebs] = sizeof(long) * 18, #endif }; -static const struct ds_configuration ds_cfg_core2 = { - .name = "core 2", +static const struct ds_configuration ds_cfg_core2_atom = { + .name = "Core 2/Atom", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_user] = (1 << 10), @@ -949,19 +949,22 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { - case 0 ... 0xC: - /* sorry, don't know about them */ - break; - case 0xD: - case 0xE: /* Pentium M */ + case 0x9: + case 0xd: /* Pentium M */ ds_configure(&ds_cfg_pentium_m); break; - default: /* Core2, Atom, ... */ - ds_configure(&ds_cfg_core2); + case 0xf: + case 0x17: /* Core2 */ + case 0x1c: /* Atom */ + ds_configure(&ds_cfg_core2_atom); + break; + case 0x1a: /* i7 */ + default: + /* sorry, don't know about them */ break; } break; - case 0xF: + case 0xf: switch (c->x86_model) { case 0x0: case 0x1: From 3690b5e6fd9daa030039ae9bda69044228bd476d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 16 Jan 2009 16:32:25 +0800 Subject: [PATCH 7/7] trace_workqueue: use percpu data for workqueue stat Impact: use percpu data instead of a global structure Use: static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); instead of allocating a global structure. percpu data also works well on NUMA. Signed-off-by: Lai Jiangshan Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_workqueue.c | 64 +++++++++++++++------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index f8118d39ca9b..4664990fe9c5 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -8,6 +8,7 @@ #include #include +#include #include "trace_stat.h" #include "trace.h" @@ -37,7 +38,8 @@ struct workqueue_global_stats { /* Don't need a global lock because allocated before the workqueues, and * never freed. */ -static struct workqueue_global_stats *all_workqueue_stat; +static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); +#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) /* Insertion of a work */ static void @@ -48,8 +50,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread, struct cpu_workqueue_stats *node, *next; unsigned long flags; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { atomic_inc(&node->inserted); @@ -58,7 +60,7 @@ probe_workqueue_insertion(struct task_struct *wq_thread, } pr_debug("trace_workqueue: entry not found\n"); found: - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } /* Execution of a work */ @@ -70,8 +72,8 @@ probe_workqueue_execution(struct task_struct *wq_thread, struct cpu_workqueue_stats *node, *next; unsigned long flags; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { node->executed++; @@ -80,7 +82,7 @@ probe_workqueue_execution(struct task_struct *wq_thread, } pr_debug("trace_workqueue: entry not found\n"); found: - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } /* Creation of a cpu workqueue thread */ @@ -104,11 +106,11 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) cws->pid = wq_thread->pid; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - if (list_empty(&all_workqueue_stat[cpu].list)) + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + if (list_empty(&workqueue_cpu_stat(cpu)->list)) cws->first_entry = true; - list_add_tail(&cws->list, &all_workqueue_stat[cpu].list); - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } /* Destruction of a cpu workqueue thread */ @@ -119,8 +121,8 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread) struct cpu_workqueue_stats *node, *next; unsigned long flags; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { list_del(&node->list); @@ -131,7 +133,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread) pr_debug("trace_workqueue: don't find workqueue to destroy\n"); found: - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } @@ -141,13 +143,13 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu) struct cpu_workqueue_stats *ret = NULL; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); - if (!list_empty(&all_workqueue_stat[cpu].list)) - ret = list_entry(all_workqueue_stat[cpu].list.next, + if (!list_empty(&workqueue_cpu_stat(cpu)->list)) + ret = list_entry(workqueue_cpu_stat(cpu)->list.next, struct cpu_workqueue_stats, list); - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); return ret; } @@ -172,9 +174,9 @@ static void *workqueue_stat_next(void *prev, int idx) unsigned long flags; void *ret = NULL; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) { - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); for (++cpu ; cpu < num_possible_cpus(); cpu++) { ret = workqueue_stat_start_cpu(cpu); if (ret) @@ -182,7 +184,7 @@ static void *workqueue_stat_next(void *prev, int idx) } return NULL; } - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, list); @@ -199,10 +201,10 @@ static int workqueue_stat_show(struct seq_file *s, void *p) cws->executed, trace_find_cmdline(cws->pid)); - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - if (&cws->list == all_workqueue_stat[cpu].list.next) + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + if (&cws->list == workqueue_cpu_stat(cpu)->list.next) seq_printf(s, "\n"); - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); return 0; } @@ -258,17 +260,9 @@ int __init trace_workqueue_early_init(void) if (ret) goto no_creation; - all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats) - * num_possible_cpus(), GFP_KERNEL); - - if (!all_workqueue_stat) { - pr_warning("trace_workqueue: not enough memory\n"); - goto no_creation; - } - for_each_possible_cpu(cpu) { - spin_lock_init(&all_workqueue_stat[cpu].lock); - INIT_LIST_HEAD(&all_workqueue_stat[cpu].list); + spin_lock_init(&workqueue_cpu_stat(cpu)->lock); + INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); } return 0;