From 30dd568c912602b7dbd609a45d053e01b13422bb Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 21 Jul 2009 15:56:48 +0200 Subject: [PATCH 01/60] x86, perf_counter, bts: Add BTS support to perfcounters Implement a performance counter with: attr.type = PERF_TYPE_HARDWARE attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS attr.sample_period = 1 Using branch trace store (BTS) on x86 hardware, if available. The from and to address for each branch can be sampled using: PERF_SAMPLE_IP for the from address PERF_SAMPLE_ADDR for the to address [ v2: address review feedback, fix bugs ] Signed-off-by: Markus Metzger Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 10 + arch/x86/kernel/cpu/perf_counter.c | 325 +++++++++++++++++++++++++++- include/linux/perf_counter.h | 2 + kernel/perf_counter.c | 10 +- 4 files changed, 340 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index fa64e401589d..e7b7c938ae27 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,16 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose a value in the middle of the fixed counter range, since lower + * values are used by actual fixed counters and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) + + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a7aa8f900954..b237c181aa41 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -6,6 +6,7 @@ * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, * * For licencing details see kernel-base/COPYING */ @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -27,12 +29,52 @@ static u64 perf_counter_mask __read_mostly; +/* The maximal number of PEBS counters: */ +#define MAX_PEBS_COUNTERS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +/* The size of a per-cpu BTS buffer in bytes: */ +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) + +/* The BTS overflow threshold in bytes from the end of the buffer: */ +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) + + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; +}; + struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; + struct debug_store *ds; }; /* @@ -57,6 +99,8 @@ struct x86_pmu { u64 counter_mask; u64 max_period; u64 intel_ctrl; + void (*enable_bts)(u64 config); + void (*disable_bts)(void); }; static struct x86_pmu x86_pmu __read_mostly; @@ -576,6 +620,9 @@ x86_perf_counter_update(struct perf_counter *counter, u64 prev_raw_count, new_raw_count; s64 delta; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * Careful: an NMI might modify the previous counter value. * @@ -659,10 +706,109 @@ static void release_pmc_hardware(void) enable_lapic_nmi_watchdog(); } +static inline bool bts_available(void) +{ + return x86_pmu.enable_bts != NULL; +} + +static inline void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(long)ds), (u32)((u64)(long)ds >> 32)); +} + +static inline void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_counters, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_bts_hardware(void) +{ + int cpu; + + if (!bts_available()) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_counters, cpu).ds = NULL; + + kfree((void *)(long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_bts_hardware(void) +{ + int cpu, err = 0; + + if (!bts_available()) + return -EOPNOTSUPP; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + + err = -ENOMEM; + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + + ds->bts_buffer_base = (u64)(long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = + ds->bts_buffer_base + BTS_BUFFER_SIZE; + ds->bts_interrupt_threshold = + ds->bts_absolute_maximum - BTS_OVFL_TH; + + per_cpu(cpu_hw_counters, cpu).ds = ds; + err = 0; + } + + if (err) + release_bts_hardware(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + static void hw_perf_counter_destroy(struct perf_counter *counter) { if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { release_pmc_hardware(); + release_bts_hardware(); mutex_unlock(&pmc_reserve_mutex); } } @@ -705,6 +851,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) return 0; } +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + /* * Setup the hardware configuration for a given attr_type */ @@ -721,9 +903,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) err = 0; if (!atomic_inc_not_zero(&active_counters)) { mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) - err = -EBUSY; - else + if (atomic_read(&active_counters) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + reserve_bts_hardware(); + } + if (!err) atomic_inc(&active_counters); mutex_unlock(&pmc_reserve_mutex); } @@ -801,7 +987,18 @@ static void p6_pmu_disable_all(void) static void intel_pmu_disable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); } static void amd_pmu_disable_all(void) @@ -859,7 +1056,25 @@ static void p6_pmu_enable_all(void) static void intel_pmu_enable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_counter *counter = + cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!counter)) + return; + + intel_pmu_enable_bts(counter->hw.config); + } } static void amd_pmu_enable_all(void) @@ -946,6 +1161,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) static inline void intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc, idx); return; @@ -974,6 +1194,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, s64 period = hwc->sample_period; int err, ret = 0; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * If we are way outside a reasoable range then just skip forward: */ @@ -1056,6 +1279,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_counters).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc, idx); return; @@ -1077,11 +1308,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { unsigned int event; + event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely((event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (hwc->sample_period == 1))) + return X86_PMC_IDX_FIXED_BTS; + if (!x86_pmu.num_counters_fixed) return -1; - event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1102,7 +1338,25 @@ static int x86_pmu_enable(struct perf_counter *counter) int idx; idx = fixed_mode_idx(counter, hwc); - if (idx >= 0) { + if (idx == X86_PMC_IDX_FIXED_BTS) { + /* + * Try to use BTS for branch tracing. If that is not + * available, try to get a generic counter. + */ + if (unlikely(!cpuc->ds)) + goto try_generic; + + /* + * Try to get the fixed counter, if that is already taken + * then try to get a generic counter: + */ + if (test_and_set_bit(idx, cpuc->used_mask)) + goto try_generic; + + hwc->config_base = 0; + hwc->counter_base = 0; + hwc->idx = idx; + } else if (idx >= 0) { /* * Try to get the fixed counter, if that is already taken * then try to get a generic counter: @@ -1213,6 +1467,45 @@ void perf_counter_print_debug(void) local_irq_restore(flags); } +static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, + struct perf_sample_data *data) +{ + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + unsigned long orig_ip = data->regs->ip; + u64 at; + + if (!counter) + return; + + if (!ds) + return; + + for (at = ds->bts_buffer_base; + at < ds->bts_index; + at += sizeof(struct bts_record)) { + struct bts_record *rec = (struct bts_record *)(long)at; + + data->regs->ip = rec->from; + data->addr = rec->to; + + perf_counter_output(counter, 1, data); + } + + ds->bts_index = ds->bts_buffer_base; + + data->regs->ip = orig_ip; + data->addr = 0; + + /* There's new data available. */ + counter->pending_kill = POLL_IN; +} + static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -1237,6 +1530,15 @@ static void x86_pmu_disable(struct perf_counter *counter) * that we are disabling: */ x86_perf_counter_update(counter, hwc, idx); + + /* Drain the remaining BTS records. */ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + struct perf_sample_data data; + struct pt_regs regs; + + data.regs = ®s; + intel_pmu_drain_bts_buffer(cpuc, &data); + } cpuc->counters[idx] = NULL; clear_bit(idx, cpuc->used_mask); @@ -1264,6 +1566,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) static void intel_pmu_reset(void) { + struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; unsigned long flags; int idx; @@ -1281,6 +1584,8 @@ static void intel_pmu_reset(void) for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); } + if (ds) + ds->bts_index = ds->bts_buffer_base; local_irq_restore(flags); } @@ -1346,6 +1651,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); + intel_pmu_drain_bts_buffer(cpuc, &data); status = intel_pmu_get_status(); if (!status) { perf_enable(); @@ -1547,6 +1853,8 @@ static struct x86_pmu intel_pmu = { * the generic counter period: */ .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, }; static struct x86_pmu amd_pmu = { @@ -1936,3 +2244,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } + +void hw_perf_counter_setup_online(int cpu) +{ + init_debug_store_on_cpu(cpu); +} diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2aabe43c1d04..0a6f3209c9de 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -692,6 +692,8 @@ struct perf_sample_data { extern int perf_counter_overflow(struct perf_counter *counter, int nmi, struct perf_sample_data *data); +extern void perf_counter_output(struct perf_counter *counter, int nmi, + struct perf_sample_data *data); /* * Return 1 for a software counter, 0 for a hardware counter diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 546e62d62941..bf8110b35c51 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } void __weak hw_perf_counter_setup(int cpu) { barrier(); } +void __weak hw_perf_counter_setup_online(int cpu) { barrier(); } int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, @@ -2630,7 +2631,7 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) return task_pid_nr_ns(p, counter->ns); } -static void perf_counter_output(struct perf_counter *counter, int nmi, +void perf_counter_output(struct perf_counter *counter, int nmi, struct perf_sample_data *data) { int ret; @@ -4566,6 +4567,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) perf_counter_init_cpu(cpu); break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hw_perf_counter_setup_online(cpu); + break; + case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: perf_counter_exit_cpu(cpu); @@ -4590,6 +4596,8 @@ void __init perf_counter_init(void) { perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, + (void *)(long)smp_processor_id()); register_cpu_notifier(&perf_cpu_nb); } From 8d51327090ac025d7f4ce6c059786b5e93513321 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 7 Aug 2009 13:55:24 +0200 Subject: [PATCH 02/60] perf report: Fix and improve the displaying of per-thread event counters Improve and fix the handling of per-thread counter stats recorded via perf record -s. Previously we only displayed it in debug printouts (-D) and even that output was hard to disambiguate. I moved everything to utils/values.[ch] so that we may reuse it in perf stat. We get something like this now: # PID TID cache-misses cache-references 4658 4659 495581 3238779 4658 4662 498246 3236823 4658 4663 499531 3243162 Then it'll be easy to add --pretty=raw to display a single line per thread/event. By the way, -S was also used for --symbol... So I used -T/--thread here. perf report: Add -T/--threads to display per-thread counter values We get something like this now: # PID TID cache-misses cache-references 4658 4659 495581 3238779 4658 4662 498246 3236823 4658 4663 499531 3243162 Per-thread arrays of counter values are managed in utils/values.[ch] Signed-off-by: Brice Goglin Cc: Peter Zijlstra Cc: paulus@samba.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 3 + tools/perf/Makefile | 2 + tools/perf/builtin-report.c | 25 ++++ tools/perf/util/values.c | 171 +++++++++++++++++++++++ tools/perf/util/values.h | 26 ++++ 5 files changed, 227 insertions(+) create mode 100644 tools/perf/util/values.c create mode 100644 tools/perf/util/values.h diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index e72e93110782..370344afb5b2 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -27,6 +27,9 @@ OPTIONS -n --show-nr-samples Show the number of samples for each symbol +-T +--threads + Show per-thread event counters -C:: --comms=:: Only consider symbols in these comms. CSV that understands diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 60411e94113b..de7beac1095e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -310,6 +310,7 @@ LIB_H += util/sigchain.h LIB_H += util/symbol.h LIB_H += util/module.h LIB_H += util/color.h +LIB_H += util/values.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -337,6 +338,7 @@ LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o LIB_OBJS += util/callchain.o +LIB_OBJS += util/values.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 99274cec0adb..41639182fb3f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -17,6 +17,7 @@ #include "util/string.h" #include "util/callchain.h" #include "util/strlist.h" +#include "util/values.h" #include "perf.h" #include "util/header.h" @@ -53,6 +54,9 @@ static int modules; static int full_paths; static int show_nr_samples; +static int show_threads; +static struct perf_read_values show_threads_values; + static unsigned long page_size; static unsigned long mmap_window = 32; @@ -1473,6 +1477,9 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) free(rem_sq_bracket); + if (show_threads) + perf_read_values_display(fp, &show_threads_values); + return ret; } @@ -1758,6 +1765,16 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) { struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); + if (show_threads) { + char *name = attr ? __event_name(attr->type, attr->config) + : "unknown"; + perf_read_values_add_value(&show_threads_values, + event->read.pid, event->read.tid, + event->read.id, + name, + event->read.value); + } + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -1839,6 +1856,9 @@ static int __cmd_report(void) register_idle_thread(); + if (show_threads) + perf_read_values_init(&show_threads_values); + input = open(input_name, O_RDONLY); if (input < 0) { fprintf(stderr, " failed to open file: %s", input_name); @@ -1993,6 +2013,9 @@ static int __cmd_report(void) output__resort(total); output__fprintf(stdout, total); + if (show_threads) + perf_read_values_destroy(&show_threads_values); + return rc; } @@ -2066,6 +2089,8 @@ static const struct option options[] = { "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, "Show a column with the number of samples"), + OPT_BOOLEAN('T', "threads", &show_threads, + "Show per-thread event counters"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c new file mode 100644 index 000000000000..8551c0b8b233 --- /dev/null +++ b/tools/perf/util/values.c @@ -0,0 +1,171 @@ +#include + +#include "util.h" +#include "values.h" + +void perf_read_values_init(struct perf_read_values *values) +{ + values->threads_max = 16; + values->pid = malloc(values->threads_max * sizeof(*values->pid)); + values->tid = malloc(values->threads_max * sizeof(*values->tid)); + values->value = malloc(values->threads_max * sizeof(*values->value)); + if (!values->pid || !values->tid || !values->value) + die("failed to allocate read_values threads arrays"); + values->threads = 0; + + values->counters_max = 16; + values->counterrawid = malloc(values->counters_max + * sizeof(*values->counterrawid)); + values->countername = malloc(values->counters_max + * sizeof(*values->countername)); + if (!values->counterrawid || !values->countername) + die("failed to allocate read_values counters arrays"); + values->counters = 0; +} + +void perf_read_values_destroy(struct perf_read_values *values) +{ + int i; + + if (!values->threads_max || !values->counters_max) + return; + + for (i = 0; i < values->threads; i++) + free(values->value[i]); + free(values->pid); + free(values->tid); + free(values->counterrawid); + for (i = 0; i < values->counters; i++) + free(values->countername[i]); + free(values->countername); +} + +static void perf_read_values__enlarge_threads(struct perf_read_values *values) +{ + values->threads_max *= 2; + values->pid = realloc(values->pid, + values->threads_max * sizeof(*values->pid)); + values->tid = realloc(values->tid, + values->threads_max * sizeof(*values->tid)); + values->value = realloc(values->value, + values->threads_max * sizeof(*values->value)); + if (!values->pid || !values->tid || !values->value) + die("failed to enlarge read_values threads arrays"); +} + +static int perf_read_values__findnew_thread(struct perf_read_values *values, + u32 pid, u32 tid) +{ + int i; + + for (i = 0; i < values->threads; i++) + if (values->pid[i] == pid && values->tid[i] == tid) + return i; + + if (values->threads == values->threads_max) + perf_read_values__enlarge_threads(values); + + i = values->threads++; + values->pid[i] = pid; + values->tid[i] = tid; + values->value[i] = malloc(values->counters_max * sizeof(**values->value)); + if (!values->value[i]) + die("failed to allocate read_values counters array"); + + return i; +} + +static void perf_read_values__enlarge_counters(struct perf_read_values *values) +{ + int i; + + values->counters_max *= 2; + values->counterrawid = realloc(values->counterrawid, + values->counters_max * sizeof(*values->counterrawid)); + values->countername = realloc(values->countername, + values->counters_max * sizeof(*values->countername)); + if (!values->counterrawid || !values->countername) + die("failed to enlarge read_values counters arrays"); + + for (i = 0; i < values->threads; i++) { + values->value[i] = realloc(values->value[i], + values->counters_max * sizeof(**values->value)); + if (!values->value[i]) + die("failed to enlarge read_values counters arrays"); + } +} + +static int perf_read_values__findnew_counter(struct perf_read_values *values, + u64 rawid, char *name) +{ + int i; + + for (i = 0; i < values->counters; i++) + if (values->counterrawid[i] == rawid) + return i; + + if (values->counters == values->counters_max) + perf_read_values__enlarge_counters(values); + + i = values->counters++; + values->counterrawid[i] = rawid; + values->countername[i] = strdup(name); + + return i; +} + +void perf_read_values_add_value(struct perf_read_values *values, + u32 pid, u32 tid, + u64 rawid, char *name, u64 value) +{ + int tindex, cindex; + + tindex = perf_read_values__findnew_thread(values, pid, tid); + cindex = perf_read_values__findnew_counter(values, rawid, name); + + values->value[tindex][cindex] = value; +} + +void perf_read_values_display(FILE *fp, struct perf_read_values *values) +{ + int i, j; + int pidwidth, tidwidth; + int *counterwidth; + + counterwidth = malloc(values->counters * sizeof(*counterwidth)); + if (!counterwidth) + die("failed to allocate counterwidth array"); + tidwidth = 3; + pidwidth = 3; + for (j = 0; j < values->counters; j++) + counterwidth[j] = strlen(values->countername[j]); + for (i = 0; i < values->threads; i++) { + int width; + + width = snprintf(NULL, 0, "%d", values->pid[i]); + if (width > pidwidth) + pidwidth = width; + width = snprintf(NULL, 0, "%d", values->tid[i]); + if (width > tidwidth) + tidwidth = width; + for (j = 0; j < values->counters; j++) { + width = snprintf(NULL, 0, "%Lu", values->value[i][j]); + if (width > counterwidth[j]) + counterwidth[j] = width; + } + } + + fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID"); + for (j = 0; j < values->counters; j++) + fprintf(fp, " %*s", counterwidth[j], values->countername[j]); + fprintf(fp, "\n"); + + for (i = 0; i < values->threads; i++) { + fprintf(fp, " %*d %*d", pidwidth, values->pid[i], + tidwidth, values->tid[i]); + for (j = 0; j < values->counters; j++) + fprintf(fp, " %*Lu", + counterwidth[j], values->value[i][j]); + fprintf(fp, "\n"); + } +} diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h new file mode 100644 index 000000000000..e41be5e86e6b --- /dev/null +++ b/tools/perf/util/values.h @@ -0,0 +1,26 @@ +#ifndef _PERF_VALUES_H +#define _PERF_VALUES_H + +#include "types.h" + +struct perf_read_values { + int threads; + int threads_max; + u32 *pid, *tid; + int counters; + int counters_max; + u64 *counterrawid; + char **countername; + u64 **value; +}; + +void perf_read_values_init(struct perf_read_values *values); +void perf_read_values_destroy(struct perf_read_values *values); + +void perf_read_values_add_value(struct perf_read_values *values, + u32 pid, u32 tid, + u64 rawid, char *name, u64 value); + +void perf_read_values_display(FILE *fp, struct perf_read_values *values); + +#endif /* _PERF_VALUES_H */ From 9f8666971185b86615a074bcac67c90fdf8af8bc Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Mon, 10 Aug 2009 15:26:32 +0200 Subject: [PATCH 03/60] perf report: Add raw displaying of per-thread counters If --pretty=raw is given to perf report -T, it now displays one line per-thread per-counter with the raw event id added. We get: # PID TID Name Raw Count 18608 18609 cache-misses 28e 416744 18608 18609 cache-references 28f 6456792 18608 18608 cache-misses 28e 448219 18608 18608 cache-references 28f 7270244 instead of: # PID TID cache-misses cache-references 18608 18609 416744 6456792 18608 18608 448219 7270244 Signed-off-by: Brice Goglin Acked-by: Peter Zijlstra LKML-Reference: <4A802008.5050409@inria.fr> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 11 ++++++- tools/perf/util/values.c | 62 ++++++++++++++++++++++++++++++++++++- tools/perf/util/values.h | 3 +- 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 41639182fb3f..2357c66fb91d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -57,6 +57,9 @@ static int show_nr_samples; static int show_threads; static struct perf_read_values show_threads_values; +static char default_pretty_printing_style[] = "normal"; +static char *pretty_printing_style = default_pretty_printing_style; + static unsigned long page_size; static unsigned long mmap_window = 32; @@ -1401,6 +1404,9 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) size_t ret = 0; unsigned int width; char *col_width = col_width_list_str; + int raw_printing_style; + + raw_printing_style = !strcmp(pretty_printing_style, "raw"); init_rem_hits(); @@ -1478,7 +1484,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) free(rem_sq_bracket); if (show_threads) - perf_read_values_display(fp, &show_threads_values); + perf_read_values_display(fp, &show_threads_values, + raw_printing_style); return ret; } @@ -2091,6 +2098,8 @@ static const struct option options[] = { "Show a column with the number of samples"), OPT_BOOLEAN('T', "threads", &show_threads, "Show per-thread event counters"), + OPT_STRING(0, "pretty", &pretty_printing_style, "key", + "pretty printing style key: normal raw"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 8551c0b8b233..614cfaf4712a 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -126,7 +126,8 @@ void perf_read_values_add_value(struct perf_read_values *values, values->value[tindex][cindex] = value; } -void perf_read_values_display(FILE *fp, struct perf_read_values *values) +static void perf_read_values__display_pretty(FILE *fp, + struct perf_read_values *values) { int i, j; int pidwidth, tidwidth; @@ -169,3 +170,62 @@ void perf_read_values_display(FILE *fp, struct perf_read_values *values) fprintf(fp, "\n"); } } + +static void perf_read_values__display_raw(FILE *fp, + struct perf_read_values *values) +{ + int width, pidwidth, tidwidth, namewidth, rawwidth, countwidth; + int i, j; + + tidwidth = 3; /* TID */ + pidwidth = 3; /* PID */ + namewidth = 4; /* "Name" */ + rawwidth = 3; /* "Raw" */ + countwidth = 5; /* "Count" */ + + for (i = 0; i < values->threads; i++) { + width = snprintf(NULL, 0, "%d", values->pid[i]); + if (width > pidwidth) + pidwidth = width; + width = snprintf(NULL, 0, "%d", values->tid[i]); + if (width > tidwidth) + tidwidth = width; + } + for (j = 0; j < values->counters; j++) { + width = strlen(values->countername[j]); + if (width > namewidth) + namewidth = width; + width = snprintf(NULL, 0, "%llx", values->counterrawid[j]); + if (width > rawwidth) + rawwidth = width; + } + for (i = 0; i < values->threads; i++) { + for (j = 0; j < values->counters; j++) { + width = snprintf(NULL, 0, "%Lu", values->value[i][j]); + if (width > countwidth) + countwidth = width; + } + } + + fprintf(fp, "# %*s %*s %*s %*s %*s\n", + pidwidth, "PID", tidwidth, "TID", + namewidth, "Name", rawwidth, "Raw", + countwidth, "Count"); + for (i = 0; i < values->threads; i++) + for (j = 0; j < values->counters; j++) + fprintf(fp, " %*d %*d %*s %*llx %*Lu\n", + pidwidth, values->pid[i], + tidwidth, values->tid[i], + namewidth, values->countername[j], + rawwidth, values->counterrawid[j], + countwidth, values->value[i][j]); +} + +void perf_read_values_display(FILE *fp, struct perf_read_values *values, + int raw) +{ + if (raw) + perf_read_values__display_raw(fp, values); + else + perf_read_values__display_pretty(fp, values); +} diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index e41be5e86e6b..f8960fde0547 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -21,6 +21,7 @@ void perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, u64 rawid, char *name, u64 value); -void perf_read_values_display(FILE *fp, struct perf_read_values *values); +void perf_read_values_display(FILE *fp, struct perf_read_values *values, + int raw); #endif /* _PERF_VALUES_H */ From cd84c2ac6d6425dd4d1b80a2231e534b9b03df18 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 12 Aug 2009 10:03:49 +0200 Subject: [PATCH 04/60] perf tools: Factorize high level dso helpers Factorize multiple definitions of high level dso helpers into the symbol source file. The side effect is a general export of the verbose and eprintf debugging helpers into a new file dedicated to debugging purposes. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Brice Goglin --- tools/perf/Makefile | 1 + tools/perf/builtin-annotate.c | 96 ---------------------------------- tools/perf/builtin-record.c | 1 - tools/perf/builtin-report.c | 97 ----------------------------------- tools/perf/builtin-stat.c | 1 - tools/perf/builtin-top.c | 4 -- tools/perf/builtin.h | 1 + tools/perf/perf.h | 1 + tools/perf/util/debug.c | 22 ++++++++ tools/perf/util/debug.h | 5 ++ tools/perf/util/symbol.c | 97 +++++++++++++++++++++++++++++++++++ tools/perf/util/symbol.h | 11 ++++ 12 files changed, 138 insertions(+), 199 deletions(-) create mode 100644 tools/perf/util/debug.c create mode 100644 tools/perf/util/debug.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index de7beac1095e..2aee21baf785 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -339,6 +339,7 @@ LIB_OBJS += util/pager.o LIB_OBJS += util/header.o LIB_OBJS += util/callchain.o LIB_OBJS += util/values.o +LIB_OBJS += util/debug.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1dba568e1941..1a792990031a 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -26,7 +26,6 @@ #define SHOW_HV 4 static char const *input_name = "perf.data"; -static char *vmlinux = "vmlinux"; static char default_sort_order[] = "comm,symbol"; static char *sort_order = default_sort_order; @@ -37,9 +36,6 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int dump_trace = 0; #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) -static int verbose; - -static int modules; static int full_paths; @@ -89,98 +85,6 @@ struct sym_ext { char *path; }; -static LIST_HEAD(dsos); -static struct dso *kernel_dso; -static struct dso *vdso; - - -static void dsos__add(struct dso *dso) -{ - list_add_tail(&dso->node, &dsos); -} - -static struct dso *dsos__find(const char *name) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - if (strcmp(pos->name, name) == 0) - return pos; - return NULL; -} - -static struct dso *dsos__findnew(const char *name) -{ - struct dso *dso = dsos__find(name); - int nr; - - if (dso) - return dso; - - dso = dso__new(name, 0); - if (!dso) - goto out_delete_dso; - - nr = dso__load(dso, NULL, verbose); - if (nr < 0) { - if (verbose) - fprintf(stderr, "Failed to open: %s\n", name); - goto out_delete_dso; - } - if (!nr && verbose) { - fprintf(stderr, - "No symbols found in: %s, maybe install a debug package?\n", - name); - } - - dsos__add(dso); - - return dso; - -out_delete_dso: - dso__delete(dso); - return NULL; -} - -static void dsos__fprintf(FILE *fp) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - dso__fprintf(pos, fp); -} - -static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) -{ - return dso__find_symbol(dso, ip); -} - -static int load_kernel(void) -{ - int err; - - kernel_dso = dso__new("[kernel]", 0); - if (!kernel_dso) - return -1; - - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); - if (err <= 0) { - dso__delete(kernel_dso); - kernel_dso = NULL; - } else - dsos__add(kernel_dso); - - vdso = dso__new("[vdso]", 0); - if (!vdso) - return -1; - - vdso->find_symbol = vdso__find_symbol; - - dsos__add(vdso); - - return err; -} - struct map { struct list_head node; u64 start; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0345aad8eba5..afae3873b47c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -40,7 +40,6 @@ static int inherit = 1; static int force = 0; static int append_file = 0; static int call_graph = 0; -static int verbose = 0; static int inherit_stat = 0; static int no_samples = 0; static int sample_address = 0; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2357c66fb91d..827eab2edf4b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -30,7 +30,6 @@ #define SHOW_HV 4 static char const *input_name = "perf.data"; -static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso,symbol"; static char *sort_order = default_sort_order; @@ -46,11 +45,6 @@ static int dump_trace = 0; #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) #define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) -static int verbose; -#define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0) - -static int modules; - static int full_paths; static int show_nr_samples; @@ -161,98 +155,7 @@ static int repsep_fprintf(FILE *fp, const char *fmt, ...) return n; } -static LIST_HEAD(dsos); -static struct dso *kernel_dso; -static struct dso *vdso; -static struct dso *hypervisor_dso; -static void dsos__add(struct dso *dso) -{ - list_add_tail(&dso->node, &dsos); -} - -static struct dso *dsos__find(const char *name) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - if (strcmp(pos->name, name) == 0) - return pos; - return NULL; -} - -static struct dso *dsos__findnew(const char *name) -{ - struct dso *dso = dsos__find(name); - int nr; - - if (dso) - return dso; - - dso = dso__new(name, 0); - if (!dso) - goto out_delete_dso; - - nr = dso__load(dso, NULL, verbose); - if (nr < 0) { - eprintf("Failed to open: %s\n", name); - goto out_delete_dso; - } - if (!nr) - eprintf("No symbols found in: %s, maybe install a debug package?\n", name); - - dsos__add(dso); - - return dso; - -out_delete_dso: - dso__delete(dso); - return NULL; -} - -static void dsos__fprintf(FILE *fp) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - dso__fprintf(pos, fp); -} - -static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) -{ - return dso__find_symbol(dso, ip); -} - -static int load_kernel(void) -{ - int err; - - kernel_dso = dso__new("[kernel]", 0); - if (!kernel_dso) - return -1; - - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); - if (err <= 0) { - dso__delete(kernel_dso); - kernel_dso = NULL; - } else - dsos__add(kernel_dso); - - vdso = dso__new("[vdso]", 0); - if (!vdso) - return -1; - - vdso->find_symbol = vdso__find_symbol; - - dsos__add(vdso); - - hypervisor_dso = dso__new("[hypervisor]", 0); - if (!hypervisor_dso) - return -1; - dsos__add(hypervisor_dso); - - return err; -} static char __cwd[PATH_MAX]; static char *cwd = __cwd; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b4b06c7903e1..4b9dd4af61a6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -63,7 +63,6 @@ static struct perf_counter_attr default_attrs[] = { #define MAX_RUN 100 static int system_wide = 0; -static int verbose = 0; static unsigned int nr_cpus = 0; static int run_idx = 0; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7de28ce9ca26..0aa567371bd3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -68,8 +68,6 @@ static int group = 0; static unsigned int page_size; static unsigned int mmap_pages = 16; static int freq = 0; -static int verbose = 0; -static char *vmlinux = NULL; static int delay_secs = 2; static int zero; @@ -338,8 +336,6 @@ static void show_details(struct sym_entry *syme) printf("%d lines not displayed, maybe increase display entries [e]\n", more); } -struct dso *kernel_dso; - /* * Symbols will be added here in record_ip and will get out * after decayed. diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 51d168230ee7..3a63e41fb44e 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -22,5 +22,6 @@ extern int cmd_stat(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); +extern int cmd_trace(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/perf.h b/tools/perf/perf.h index e5148e2b6134..f5509213f030 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -48,6 +48,7 @@ #include "../../include/linux/perf_counter.h" #include "util/types.h" +#include "util/debug.h" /* * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c new file mode 100644 index 000000000000..7cb8464abe61 --- /dev/null +++ b/tools/perf/util/debug.c @@ -0,0 +1,22 @@ +/* For general debugging purposes */ + +#include "../perf.h" +#include +#include +#include + +int verbose = 0; + +int eprintf(const char *fmt, ...) +{ + va_list args; + int ret = 0; + + if (verbose) { + va_start(args, fmt); + ret = vfprintf(stderr, fmt, args); + va_end(args); + } + + return ret; +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h new file mode 100644 index 000000000000..2ae9090108d3 --- /dev/null +++ b/tools/perf/util/debug.h @@ -0,0 +1,5 @@ +/* For debugging general purposes */ + +extern int verbose; + +int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f1dcede14307..e9b13b414955 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -937,6 +937,103 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, return err; } +LIST_HEAD(dsos); +struct dso *kernel_dso; +struct dso *vdso; +struct dso *hypervisor_dso; + +char *vmlinux = "vmlinux"; +int modules; + +static void dsos__add(struct dso *dso) +{ + list_add_tail(&dso->node, &dsos); +} + +static struct dso *dsos__find(const char *name) +{ + struct dso *pos; + + list_for_each_entry(pos, &dsos, node) + if (strcmp(pos->name, name) == 0) + return pos; + return NULL; +} + +struct dso *dsos__findnew(const char *name) +{ + struct dso *dso = dsos__find(name); + int nr; + + if (dso) + return dso; + + dso = dso__new(name, 0); + if (!dso) + goto out_delete_dso; + + nr = dso__load(dso, NULL, verbose); + if (nr < 0) { + eprintf("Failed to open: %s\n", name); + goto out_delete_dso; + } + if (!nr) + eprintf("No symbols found in: %s, maybe install a debug package?\n", name); + + dsos__add(dso); + + return dso; + +out_delete_dso: + dso__delete(dso); + return NULL; +} + +void dsos__fprintf(FILE *fp) +{ + struct dso *pos; + + list_for_each_entry(pos, &dsos, node) + dso__fprintf(pos, fp); +} + +static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) +{ + return dso__find_symbol(dso, ip); +} + +int load_kernel(void) +{ + int err; + + kernel_dso = dso__new("[kernel]", 0); + if (!kernel_dso) + return -1; + + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); + if (err <= 0) { + dso__delete(kernel_dso); + kernel_dso = NULL; + } else + dsos__add(kernel_dso); + + vdso = dso__new("[vdso]", 0); + if (!vdso) + return -1; + + vdso->find_symbol = vdso__find_symbol; + + dsos__add(vdso); + + hypervisor_dso = dso__new("[hypervisor]", 0); + if (!hypervisor_dso) + return -1; + dsos__add(hypervisor_dso); + + return err; +} + + void symbol__init(void) { elf_version(EV_CURRENT); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1e003ec2f4b1..f3490fcd40ee 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -48,9 +48,20 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose, int modules); int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); int dso__load(struct dso *self, symbol_filter_t filter, int verbose); +struct dso *dsos__findnew(const char *name); +void dsos__fprintf(FILE *fp); size_t dso__fprintf(struct dso *self, FILE *fp); char dso__symtab_origin(const struct dso *self); +int load_kernel(void); + void symbol__init(void); + +extern struct list_head dsos; +extern struct dso *kernel_dso; +extern struct dso *vdso; +extern struct dso *hypervisor_dso; +extern char *vmlinux; +extern int modules; #endif /* _PERF_SYMBOL_ */ From 1fe2c1066ce6a30bda7b27785ee3d9b8e62ffbbd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 12 Aug 2009 10:19:53 +0200 Subject: [PATCH 05/60] perf tools: Factorize the event structure definitions in a single file Factorize the multiple definition of the events structures into a single util/event.h file. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Brice Goglin --- tools/perf/builtin-annotate.c | 34 ---------------------- tools/perf/builtin-record.c | 18 ------------ tools/perf/builtin-report.c | 53 ---------------------------------- tools/perf/builtin-top.c | 20 ------------- tools/perf/util/event.h | 54 +++++++++++++++++++++++++++++++++++ tools/perf/util/util.h | 2 ++ 6 files changed, 56 insertions(+), 125 deletions(-) create mode 100644 tools/perf/util/event.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1a792990031a..fee663adeea2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -44,40 +44,6 @@ static int print_line; static unsigned long page_size; static unsigned long mmap_window = 32; -struct ip_event { - struct perf_event_header header; - u64 ip; - u32 pid, tid; -}; - -struct mmap_event { - struct perf_event_header header; - u32 pid, tid; - u64 start; - u64 len; - u64 pgoff; - char filename[PATH_MAX]; -}; - -struct comm_event { - struct perf_event_header header; - u32 pid, tid; - char comm[16]; -}; - -struct fork_event { - struct perf_event_header header; - u32 pid, ppid; -}; - -typedef union event_union { - struct perf_event_header header; - struct ip_event ip; - struct mmap_event mmap; - struct comm_event comm; - struct fork_event fork; -} event_t; - struct sym_ext { struct rb_node node; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index afae3873b47c..718b8f7b6aac 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -59,24 +59,6 @@ static int file_new = 1; struct perf_header *header; -struct mmap_event { - struct perf_event_header header; - u32 pid; - u32 tid; - u64 start; - u64 len; - u64 pgoff; - char filename[PATH_MAX]; -}; - -struct comm_event { - struct perf_event_header header; - u32 pid; - u32 tid; - char comm[16]; -}; - - struct mmap_data { int counter; void *base; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 827eab2edf4b..1efefcc2ffdf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -75,59 +75,6 @@ struct callchain_param callchain_param = { static u64 sample_type; -struct ip_event { - struct perf_event_header header; - u64 ip; - u32 pid, tid; - unsigned char __more_data[]; -}; - -struct mmap_event { - struct perf_event_header header; - u32 pid, tid; - u64 start; - u64 len; - u64 pgoff; - char filename[PATH_MAX]; -}; - -struct comm_event { - struct perf_event_header header; - u32 pid, tid; - char comm[16]; -}; - -struct fork_event { - struct perf_event_header header; - u32 pid, ppid; - u32 tid, ptid; -}; - -struct lost_event { - struct perf_event_header header; - u64 id; - u64 lost; -}; - -struct read_event { - struct perf_event_header header; - u32 pid,tid; - u64 value; - u64 time_enabled; - u64 time_running; - u64 id; -}; - -typedef union event_union { - struct perf_event_header header; - struct ip_event ip; - struct mmap_event mmap; - struct comm_event comm; - struct fork_event fork; - struct lost_event lost; - struct read_event read; -} event_t; - static int repsep_fprintf(FILE *fp, const char *fmt, ...) { int n; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0aa567371bd3..9a6dbbff9a9f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -933,26 +933,6 @@ static void mmap_read_counter(struct mmap_data *md) last_read = this_read; for (; old != head;) { - struct ip_event { - struct perf_event_header header; - u64 ip; - u32 pid, target_pid; - }; - struct mmap_event { - struct perf_event_header header; - u32 pid, target_pid; - u64 start; - u64 len; - u64 pgoff; - char filename[PATH_MAX]; - }; - - typedef union event_union { - struct perf_event_header header; - struct ip_event ip; - struct mmap_event mmap; - } event_t; - event_t *event = (event_t *)&data[old & md->mask]; event_t event_copy; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h new file mode 100644 index 000000000000..91e2fe589f27 --- /dev/null +++ b/tools/perf/util/event.h @@ -0,0 +1,54 @@ +#include "../perf.h" + +struct ip_event { + struct perf_event_header header; + u64 ip; + u32 pid, tid; + unsigned char __more_data[]; +}; + +struct mmap_event { + struct perf_event_header header; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; + char filename[PATH_MAX]; +}; + +struct comm_event { + struct perf_event_header header; + u32 pid, tid; + char comm[16]; +}; + +struct fork_event { + struct perf_event_header header; + u32 pid, ppid; + u32 tid, ptid; +}; + +struct lost_event { + struct perf_event_header header; + u64 id; + u64 lost; +}; + +struct read_event { + struct perf_event_header header; + u32 pid,tid; + u64 value; + u64 time_enabled; + u64 time_running; + u64 id; +}; + +typedef union event_union { + struct perf_event_header header; + struct ip_event ip; + struct mmap_event mmap; + struct comm_event comm; + struct fork_event fork; + struct lost_event lost; + struct read_event read; +} event_t; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 68fe157d72fb..be4b52cca2c3 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -83,6 +83,8 @@ #include #include "../../../include/linux/magic.h" +#include "event.h" + #ifndef NO_ICONV #include #endif From 66e274f3b8d7fc89d38997e85b900e188f8d5cc0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 12 Aug 2009 11:07:25 +0200 Subject: [PATCH 06/60] perf tools: Factorize the map helpers Factorize the dso mapping helpers into a single purpose common file "util/map.c" Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Brice Goglin --- tools/perf/Makefile | 1 + tools/perf/builtin-annotate.c | 79 +--------------------- tools/perf/builtin-record.c | 1 + tools/perf/builtin-report.c | 124 ++-------------------------------- tools/perf/util/callchain.h | 1 + tools/perf/util/event.h | 30 ++++++++ tools/perf/util/map.c | 97 ++++++++++++++++++++++++++ tools/perf/util/symbol.h | 1 + tools/perf/util/util.h | 1 - 9 files changed, 137 insertions(+), 198 deletions(-) create mode 100644 tools/perf/util/map.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2aee21baf785..cb9033d3f72f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -340,6 +340,7 @@ LIB_OBJS += util/header.o LIB_OBJS += util/callchain.o LIB_OBJS += util/values.o LIB_OBJS += util/debug.o +LIB_OBJS += util/map.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index fee663adeea2..543c4524f8c2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -51,83 +51,6 @@ struct sym_ext { char *path; }; -struct map { - struct list_head node; - u64 start; - u64 end; - u64 pgoff; - u64 (*map_ip)(struct map *, u64); - struct dso *dso; -}; - -static u64 map__map_ip(struct map *map, u64 ip) -{ - return ip - map->start + map->pgoff; -} - -static u64 vdso__map_ip(struct map *map __used, u64 ip) -{ - return ip; -} - -static struct map *map__new(struct mmap_event *event) -{ - struct map *self = malloc(sizeof(*self)); - - if (self != NULL) { - const char *filename = event->filename; - - self->start = event->start; - self->end = event->start + event->len; - self->pgoff = event->pgoff; - - self->dso = dsos__findnew(filename); - if (self->dso == NULL) - goto out_delete; - - if (self->dso == vdso) - self->map_ip = vdso__map_ip; - else - self->map_ip = map__map_ip; - } - return self; -out_delete: - free(self); - return NULL; -} - -static struct map *map__clone(struct map *self) -{ - struct map *map = malloc(sizeof(*self)); - - if (!map) - return NULL; - - memcpy(map, self, sizeof(*self)); - - return map; -} - -static int map__overlap(struct map *l, struct map *r) -{ - if (l->start > r->start) { - struct map *t = l; - l = r; - r = t; - } - - if (l->end > r->start) - return 1; - - return 0; -} - -static size_t map__fprintf(struct map *self, FILE *fp) -{ - return fprintf(fp, " %Lx-%Lx %Lx %s\n", - self->start, self->end, self->pgoff, self->dso->name); -} - struct thread { struct rb_node rb_node; @@ -797,7 +720,7 @@ static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread = threads__findnew(event->mmap.pid); - struct map *map = map__new(&event->mmap); + struct map *map = map__new(&event->mmap, NULL, 0); dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 718b8f7b6aac..106c6abd1c38 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -15,6 +15,7 @@ #include "util/string.h" #include "util/header.h" +#include "util/event.h" #include #include diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1efefcc2ffdf..93945ecdac86 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -67,6 +67,10 @@ static char callchain_default_opt[] = "fractal,0.5"; static int callchain; +static char __cwd[PATH_MAX]; +static char *cwd = __cwd; +static int cwdlen; + static struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_REL, @@ -102,124 +106,6 @@ static int repsep_fprintf(FILE *fp, const char *fmt, ...) return n; } - - -static char __cwd[PATH_MAX]; -static char *cwd = __cwd; -static int cwdlen; - -static int strcommon(const char *pathname) -{ - int n = 0; - - while (n < cwdlen && pathname[n] == cwd[n]) - ++n; - - return n; -} - -struct map { - struct list_head node; - u64 start; - u64 end; - u64 pgoff; - u64 (*map_ip)(struct map *, u64); - struct dso *dso; -}; - -static u64 map__map_ip(struct map *map, u64 ip) -{ - return ip - map->start + map->pgoff; -} - -static u64 vdso__map_ip(struct map *map __used, u64 ip) -{ - return ip; -} - -static inline int is_anon_memory(const char *filename) -{ - return strcmp(filename, "//anon") == 0; -} - -static struct map *map__new(struct mmap_event *event) -{ - struct map *self = malloc(sizeof(*self)); - - if (self != NULL) { - const char *filename = event->filename; - char newfilename[PATH_MAX]; - int anon; - - if (cwd) { - int n = strcommon(filename); - - if (n == cwdlen) { - snprintf(newfilename, sizeof(newfilename), - ".%s", filename + n); - filename = newfilename; - } - } - - anon = is_anon_memory(filename); - - if (anon) { - snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid); - filename = newfilename; - } - - self->start = event->start; - self->end = event->start + event->len; - self->pgoff = event->pgoff; - - self->dso = dsos__findnew(filename); - if (self->dso == NULL) - goto out_delete; - - if (self->dso == vdso || anon) - self->map_ip = vdso__map_ip; - else - self->map_ip = map__map_ip; - } - return self; -out_delete: - free(self); - return NULL; -} - -static struct map *map__clone(struct map *self) -{ - struct map *map = malloc(sizeof(*self)); - - if (!map) - return NULL; - - memcpy(map, self, sizeof(*self)); - - return map; -} - -static int map__overlap(struct map *l, struct map *r) -{ - if (l->start > r->start) { - struct map *t = l; - l = r; - r = t; - } - - if (l->end > r->start) - return 1; - - return 0; -} - -static size_t map__fprintf(struct map *self, FILE *fp) -{ - return fprintf(fp, " %Lx-%Lx %Lx %s\n", - self->start, self->end, self->pgoff, self->dso->name); -} - - struct thread { struct rb_node rb_node; struct list_head maps; @@ -1474,7 +1360,7 @@ static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread = threads__findnew(event->mmap.pid); - struct map *map = map__new(&event->mmap); + struct map *map = map__new(&event->mmap, cwd, cwdlen); dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index a926ae4f5a16..43cf3ea9e088 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -4,6 +4,7 @@ #include "../perf.h" #include #include +#include "util.h" #include "symbol.h" enum chain_mode { diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 91e2fe589f27..d26dc887ce52 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -1,4 +1,8 @@ +#ifndef __PERF_EVENT_H +#define __PERF_EVENT_H #include "../perf.h" +#include "util.h" +#include struct ip_event { struct perf_event_header header; @@ -52,3 +56,29 @@ typedef union event_union { struct lost_event lost; struct read_event read; } event_t; + +struct map { + struct list_head node; + u64 start; + u64 end; + u64 pgoff; + u64 (*map_ip)(struct map *, u64); + struct dso *dso; +}; + +static inline u64 map__map_ip(struct map *map, u64 ip) +{ + return ip - map->start + map->pgoff; +} + +static inline u64 vdso__map_ip(struct map *map __used, u64 ip) +{ + return ip; +} + +struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); +struct map *map__clone(struct map *self); +int map__overlap(struct map *l, struct map *r); +size_t map__fprintf(struct map *self, FILE *fp); + +#endif diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c new file mode 100644 index 000000000000..804e02382739 --- /dev/null +++ b/tools/perf/util/map.c @@ -0,0 +1,97 @@ +#include "event.h" +#include "symbol.h" +#include +#include +#include + +static inline int is_anon_memory(const char *filename) +{ + return strcmp(filename, "//anon") == 0; +} + +static int strcommon(const char *pathname, char *cwd, int cwdlen) +{ + int n = 0; + + while (n < cwdlen && pathname[n] == cwd[n]) + ++n; + + return n; +} + + struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) +{ + struct map *self = malloc(sizeof(*self)); + + if (self != NULL) { + const char *filename = event->filename; + char newfilename[PATH_MAX]; + int anon; + + if (cwd) { + int n = strcommon(filename, cwd, cwdlen); + + if (n == cwdlen) { + snprintf(newfilename, sizeof(newfilename), + ".%s", filename + n); + filename = newfilename; + } + } + + anon = is_anon_memory(filename); + + if (anon) { + snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid); + filename = newfilename; + } + + self->start = event->start; + self->end = event->start + event->len; + self->pgoff = event->pgoff; + + self->dso = dsos__findnew(filename); + if (self->dso == NULL) + goto out_delete; + + if (self->dso == vdso || anon) + self->map_ip = vdso__map_ip; + else + self->map_ip = map__map_ip; + } + return self; +out_delete: + free(self); + return NULL; +} + +struct map *map__clone(struct map *self) +{ + struct map *map = malloc(sizeof(*self)); + + if (!map) + return NULL; + + memcpy(map, self, sizeof(*self)); + + return map; +} + +int map__overlap(struct map *l, struct map *r) +{ + if (l->start > r->start) { + struct map *t = l; + l = r; + r = t; + } + + if (l->end > r->start) + return 1; + + return 0; +} + +size_t map__fprintf(struct map *self, FILE *fp) +{ + return fprintf(fp, " %Lx-%Lx %Lx %s\n", + self->start, self->end, self->pgoff, self->dso->name); +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f3490fcd40ee..50f723571241 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -6,6 +6,7 @@ #include #include #include "module.h" +#include "event.h" struct symbol { struct rb_node rb_node; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index be4b52cca2c3..d61a6f037631 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -83,7 +83,6 @@ #include #include "../../../include/linux/magic.h" -#include "event.h" #ifndef NO_ICONV #include From 18408ddc01136f505ae357c03f0d8e50b10e0db6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 13 Aug 2009 11:47:55 +0200 Subject: [PATCH 07/60] perf tools: Add some comments to the event definitions Just to make it clear that these are _not_ generic event structures but do rely on the counter configuration. Signed-off-by: Peter Zijlstra Cc: Corey J Ashford Cc: Paul Mackerras Cc: stephane eranian LKML-Reference: <20090813103655.334194326@chello.nl> Signed-off-by: Ingo Molnar --- tools/perf/util/event.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d26dc887ce52..fa7c50b654e1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -4,6 +4,9 @@ #include "util.h" #include +/* + * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * + */ struct ip_event { struct perf_event_header header; u64 ip; @@ -38,6 +41,9 @@ struct lost_event { u64 lost; }; +/* + * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID + */ struct read_event { struct perf_event_header header; u32 pid,tid; From 6baa0a5ae0954fb2486c480a20556a9f1aee0965 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 14 Aug 2009 12:21:53 +0200 Subject: [PATCH 08/60] perf tools: Factorize the thread code in a dedicated file Factorize the thread management code used by perf-annotate and perf-report in dedicated source and header files. v2: pass last_match by address so that it can actually be modified. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250245313-6995-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/builtin-annotate.c | 173 +++------------------------- tools/perf/builtin-report.c | 205 ++++------------------------------ tools/perf/util/thread.c | 143 ++++++++++++++++++++++++ tools/perf/util/thread.h | 19 ++++ 5 files changed, 202 insertions(+), 339 deletions(-) create mode 100644 tools/perf/util/thread.c create mode 100644 tools/perf/util/thread.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 68218cfd38b3..0056405e4c93 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -341,6 +341,7 @@ LIB_OBJS += util/callchain.o LIB_OBJS += util/values.o LIB_OBJS += util/debug.o LIB_OBJS += util/map.o +LIB_OBJS += util/thread.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 543c4524f8c2..3bedaa5d21d2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -20,6 +20,7 @@ #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/thread.h" #define SHOW_KERNEL 1 #define SHOW_USER 2 @@ -44,6 +45,9 @@ static int print_line; static unsigned long page_size; static unsigned long mmap_window = 32; +static struct rb_root threads; +static struct thread *last_match; + struct sym_ext { struct rb_node node; @@ -51,154 +55,6 @@ struct sym_ext { char *path; }; - -struct thread { - struct rb_node rb_node; - struct list_head maps; - pid_t pid; - char *comm; -}; - -static struct thread *thread__new(pid_t pid) -{ - struct thread *self = malloc(sizeof(*self)); - - if (self != NULL) { - self->pid = pid; - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); - INIT_LIST_HEAD(&self->maps); - } - - return self; -} - -static int thread__set_comm(struct thread *self, const char *comm) -{ - if (self->comm) - free(self->comm); - self->comm = strdup(comm); - return self->comm ? 0 : -ENOMEM; -} - -static size_t thread__fprintf(struct thread *self, FILE *fp) -{ - struct map *pos; - size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); - - list_for_each_entry(pos, &self->maps, node) - ret += map__fprintf(pos, fp); - - return ret; -} - - -static struct rb_root threads; -static struct thread *last_match; - -static struct thread *threads__findnew(pid_t pid) -{ - struct rb_node **p = &threads.rb_node; - struct rb_node *parent = NULL; - struct thread *th; - - /* - * Font-end cache - PID lookups come in blocks, - * so most of the time we dont have to look up - * the full rbtree: - */ - if (last_match && last_match->pid == pid) - return last_match; - - while (*p != NULL) { - parent = *p; - th = rb_entry(parent, struct thread, rb_node); - - if (th->pid == pid) { - last_match = th; - return th; - } - - if (pid < th->pid) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - th = thread__new(pid); - if (th != NULL) { - rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &threads); - last_match = th; - } - - return th; -} - -static void thread__insert_map(struct thread *self, struct map *map) -{ - struct map *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &self->maps, node) { - if (map__overlap(pos, map)) { - list_del_init(&pos->node); - /* XXX leaks dsos */ - free(pos); - } - } - - list_add_tail(&map->node, &self->maps); -} - -static int thread__fork(struct thread *self, struct thread *parent) -{ - struct map *map; - - if (self->comm) - free(self->comm); - self->comm = strdup(parent->comm); - if (!self->comm) - return -ENOMEM; - - list_for_each_entry(map, &parent->maps, node) { - struct map *new = map__clone(map); - if (!new) - return -ENOMEM; - thread__insert_map(self, new); - } - - return 0; -} - -static struct map *thread__find_map(struct thread *self, u64 ip) -{ - struct map *pos; - - if (self == NULL) - return NULL; - - list_for_each_entry(pos, &self->maps, node) - if (ip >= pos->start && ip <= pos->end) - return pos; - - return NULL; -} - -static size_t threads__fprintf(FILE *fp) -{ - size_t ret = 0; - struct rb_node *nd; - - for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); - - ret += thread__fprintf(pos, fp); - } - - return ret; -} - /* * histogram, sorted on item, collects counts */ @@ -624,7 +480,7 @@ static void output__resort(void) static void register_idle_thread(void) { - struct thread *thread = threads__findnew(0); + struct thread *thread = threads__findnew(0, &threads, &last_match); if (thread == NULL || thread__set_comm(thread, "[idle]")) { @@ -645,10 +501,12 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) char level; int show = 0; struct dso *dso = NULL; - struct thread *thread = threads__findnew(event->ip.pid); + struct thread *thread; u64 ip = event->ip.ip; struct map *map = NULL; + thread = threads__findnew(event->ip.pid, &threads, &last_match); + dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -719,9 +577,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread = threads__findnew(event->mmap.pid); + struct thread *thread; struct map *map = map__new(&event->mmap, NULL, 0); + thread = threads__findnew(event->mmap.pid, &threads, &last_match); + dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -745,8 +605,9 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread = threads__findnew(event->comm.pid); + struct thread *thread; + thread = threads__findnew(event->comm.pid, &threads, &last_match); dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -765,9 +626,11 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) static int process_fork_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); + struct thread *thread; + struct thread *parent; + thread = threads__findnew(event->fork.pid, &threads, &last_match); + parent = threads__findnew(event->fork.ppid, &threads, &last_match); dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -1202,7 +1065,7 @@ static int __cmd_annotate(void) return 0; if (verbose >= 3) - threads__fprintf(stdout); + threads__fprintf(stdout, &threads); if (verbose >= 2) dsos__fprintf(stdout); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 6321951fe1bf..298f26b8ac78 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -25,6 +25,8 @@ #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/thread.h" + #define SHOW_KERNEL 1 #define SHOW_USER 2 #define SHOW_HV 4 @@ -71,6 +73,9 @@ static char __cwd[PATH_MAX]; static char *cwd = __cwd; static int cwdlen; +static struct rb_root threads; +static struct thread *last_match; + static struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_REL, @@ -106,187 +111,10 @@ static int repsep_fprintf(FILE *fp, const char *fmt, ...) return n; } -struct thread { - struct rb_node rb_node; - struct list_head maps; - pid_t pid; - char *comm; -}; - -static struct thread *thread__new(pid_t pid) -{ - struct thread *self = malloc(sizeof(*self)); - - if (self != NULL) { - self->pid = pid; - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); - INIT_LIST_HEAD(&self->maps); - } - - return self; -} - static unsigned int dsos__col_width, comms__col_width, threads__col_width; -static int thread__set_comm(struct thread *self, const char *comm) -{ - if (self->comm) - free(self->comm); - self->comm = strdup(comm); - if (!self->comm) - return -ENOMEM; - - if (!col_width_list_str && !field_sep && - (!comm_list || strlist__has_entry(comm_list, comm))) { - unsigned int slen = strlen(comm); - if (slen > comms__col_width) { - comms__col_width = slen; - threads__col_width = slen + 6; - } - } - - return 0; -} - -static size_t thread__fprintf(struct thread *self, FILE *fp) -{ - struct map *pos; - size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); - - list_for_each_entry(pos, &self->maps, node) - ret += map__fprintf(pos, fp); - - return ret; -} - - -static struct rb_root threads; -static struct thread *last_match; - -static struct thread *threads__findnew(pid_t pid) -{ - struct rb_node **p = &threads.rb_node; - struct rb_node *parent = NULL; - struct thread *th; - - /* - * Font-end cache - PID lookups come in blocks, - * so most of the time we dont have to look up - * the full rbtree: - */ - if (last_match && last_match->pid == pid) - return last_match; - - while (*p != NULL) { - parent = *p; - th = rb_entry(parent, struct thread, rb_node); - - if (th->pid == pid) { - last_match = th; - return th; - } - - if (pid < th->pid) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - th = thread__new(pid); - if (th != NULL) { - rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &threads); - last_match = th; - } - - return th; -} - -static void thread__insert_map(struct thread *self, struct map *map) -{ - struct map *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &self->maps, node) { - if (map__overlap(pos, map)) { - if (verbose >= 2) { - printf("overlapping maps:\n"); - map__fprintf(map, stdout); - map__fprintf(pos, stdout); - } - - if (map->start <= pos->start && map->end > pos->start) - pos->start = map->end; - - if (map->end >= pos->end && map->start < pos->end) - pos->end = map->start; - - if (verbose >= 2) { - printf("after collision:\n"); - map__fprintf(pos, stdout); - } - - if (pos->start >= pos->end) { - list_del_init(&pos->node); - free(pos); - } - } - } - - list_add_tail(&map->node, &self->maps); -} - -static int thread__fork(struct thread *self, struct thread *parent) -{ - struct map *map; - - if (self->comm) - free(self->comm); - self->comm = strdup(parent->comm); - if (!self->comm) - return -ENOMEM; - - list_for_each_entry(map, &parent->maps, node) { - struct map *new = map__clone(map); - if (!new) - return -ENOMEM; - thread__insert_map(self, new); - } - - return 0; -} - -static struct map *thread__find_map(struct thread *self, u64 ip) -{ - struct map *pos; - - if (self == NULL) - return NULL; - - list_for_each_entry(pos, &self->maps, node) - if (ip >= pos->start && ip <= pos->end) - return pos; - - return NULL; -} - -static size_t threads__fprintf(FILE *fp) -{ - size_t ret = 0; - struct rb_node *nd; - - for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); - - ret += thread__fprintf(pos, fp); - } - - return ret; -} - /* * histogram, sorted on item, collects counts */ @@ -1228,7 +1056,7 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) static void register_idle_thread(void) { - struct thread *thread = threads__findnew(0); + struct thread *thread = threads__findnew(0, &threads, &last_match); if (thread == NULL || thread__set_comm(thread, "[idle]")) { @@ -1263,7 +1091,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) char level; int show = 0; struct dso *dso = NULL; - struct thread *thread = threads__findnew(event->ip.pid); + struct thread *thread; u64 ip = event->ip.ip; u64 period = 1; struct map *map = NULL; @@ -1271,6 +1099,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) struct ip_callchain *chain = NULL; int cpumode; + thread = threads__findnew(event->ip.pid, &threads, &last_match); + if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; more_data += sizeof(u64); @@ -1360,9 +1190,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread = threads__findnew(event->mmap.pid); + struct thread *thread; struct map *map = map__new(&event->mmap, cwd, cwdlen); + thread = threads__findnew(event->mmap.pid, &threads, &last_match); + dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -1387,7 +1219,9 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread = threads__findnew(event->comm.pid); + struct thread *thread; + + thread = threads__findnew(event->comm.pid, &threads, &last_match); dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", (void *)(offset + head), @@ -1407,8 +1241,11 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) static int process_task_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); + struct thread *thread; + struct thread *parent; + + thread = threads__findnew(event->fork.pid, &threads, &last_match); + parent = threads__findnew(event->fork.ppid, &threads, &last_match); dprintf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), @@ -1749,7 +1586,7 @@ static int __cmd_report(void) return 0; if (verbose >= 3) - threads__fprintf(stdout); + threads__fprintf(stdout, &threads); if (verbose >= 2) dsos__fprintf(stdout); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c new file mode 100644 index 000000000000..00c14b98d651 --- /dev/null +++ b/tools/perf/util/thread.c @@ -0,0 +1,143 @@ +#include "../perf.h" +#include +#include +#include +#include "thread.h" +#include "util.h" + +static struct thread *thread__new(pid_t pid) +{ + struct thread *self = malloc(sizeof(*self)); + + if (self != NULL) { + self->pid = pid; + self->comm = malloc(32); + if (self->comm) + snprintf(self->comm, 32, ":%d", self->pid); + INIT_LIST_HEAD(&self->maps); + } + + return self; +} + +int thread__set_comm(struct thread *self, const char *comm) +{ + if (self->comm) + free(self->comm); + self->comm = strdup(comm); + return self->comm ? 0 : -ENOMEM; +} + +static size_t thread__fprintf(struct thread *self, FILE *fp) +{ + struct map *pos; + size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + + list_for_each_entry(pos, &self->maps, node) + ret += map__fprintf(pos, fp); + + return ret; +} + +struct thread * +threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +{ + struct rb_node **p = &threads->rb_node; + struct rb_node *parent = NULL; + struct thread *th; + + /* + * Font-end cache - PID lookups come in blocks, + * so most of the time we dont have to look up + * the full rbtree: + */ + if (*last_match && (*last_match)->pid == pid) + return *last_match; + + while (*p != NULL) { + parent = *p; + th = rb_entry(parent, struct thread, rb_node); + + if (th->pid == pid) { + *last_match = th; + return th; + } + + if (pid < th->pid) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + th = thread__new(pid); + if (th != NULL) { + rb_link_node(&th->rb_node, parent, p); + rb_insert_color(&th->rb_node, threads); + *last_match = th; + } + + return th; +} + +void thread__insert_map(struct thread *self, struct map *map) +{ + struct map *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &self->maps, node) { + if (map__overlap(pos, map)) { + list_del_init(&pos->node); + /* XXX leaks dsos */ + free(pos); + } + } + + list_add_tail(&map->node, &self->maps); +} + +int thread__fork(struct thread *self, struct thread *parent) +{ + struct map *map; + + if (self->comm) + free(self->comm); + self->comm = strdup(parent->comm); + if (!self->comm) + return -ENOMEM; + + list_for_each_entry(map, &parent->maps, node) { + struct map *new = map__clone(map); + if (!new) + return -ENOMEM; + thread__insert_map(self, new); + } + + return 0; +} + +struct map *thread__find_map(struct thread *self, u64 ip) +{ + struct map *pos; + + if (self == NULL) + return NULL; + + list_for_each_entry(pos, &self->maps, node) + if (ip >= pos->start && ip <= pos->end) + return pos; + + return NULL; +} + +size_t threads__fprintf(FILE *fp, struct rb_root *threads) +{ + size_t ret = 0; + struct rb_node *nd; + + for (nd = rb_first(threads); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); + + ret += thread__fprintf(pos, fp); + } + + return ret; +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h new file mode 100644 index 000000000000..b1c66719379b --- /dev/null +++ b/tools/perf/util/thread.h @@ -0,0 +1,19 @@ +#include +#include +#include +#include "symbol.h" + +struct thread { + struct rb_node rb_node; + struct list_head maps; + pid_t pid; + char *comm; +}; + +int thread__set_comm(struct thread *self, const char *comm); +struct thread * +threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); +void thread__insert_map(struct thread *self, struct map *map); +int thread__fork(struct thread *self, struct thread *parent); +struct map *thread__find_map(struct thread *self, u64 ip); +size_t threads__fprintf(FILE *fp, struct rb_root *threads); From 83a0944fa919fb2ebcfc1f8933d86e437b597ca6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 15 Aug 2009 12:26:57 +0200 Subject: [PATCH 09/60] perf: Enable more compiler warnings Related to a shadowed variable bug fix Valdis Kletnieks noticed that perf does not get built with -Wshadow, which could have helped us avoid the bug. So enable -Wshadow and also enable the following warnings on perf builds, in addition to the already enabled -Wall -Wextra -std=gnu99 warnings: -Wcast-align -Wformat=2 -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement And change/fix the perf code to build cleanly under GCC 4.3.2. The list of warnings enablement is rather arbitrary: it's based on my (quick) reading of the GCC manpages and trying them on perf. I categorized the warnings based on individually enabling them and looking whether they trigger something in the perf build. If i liked those warnings (i.e. if they trigger for something that arguably could be improved) i enabled the warning. If the warnings seemed to come from language laywers spamming the build with tons of nuisance warnings i generally kept them off. Most of the sign conversion related warnings were in this category. (A second patch enabling some of the sign warnings might be welcome - sign bugs can be nasty.) I also kept warnings that seem to make sense from their manpage description and which produced no actual warnings on our code base. These warnings might still be turned off if they end up being a nuisance. I also left out a few warnings that are not supported in older compilers. [ Note that these changes might break the build on older compilers i did not test, or on non-x86 architectures that produce different warnings, so more testing would be welcome. ] Reported-by: Valdis.Kletnieks@vt.edu Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 7 ++- tools/perf/builtin-annotate.c | 32 +++++----- tools/perf/builtin-help.c | 1 + tools/perf/builtin-report.c | 38 ++++++------ tools/perf/builtin-top.c | 23 ++++--- tools/perf/util/abspath.c | 3 +- tools/perf/util/cache.h | 1 - tools/perf/util/callchain.c | 2 + tools/perf/util/color.c | 6 +- tools/perf/util/color.h | 2 +- tools/perf/util/config.c | 22 ++++--- tools/perf/util/exec_cmd.c | 1 - tools/perf/util/module.c | 4 +- tools/perf/util/parse-events.c | 26 ++++---- tools/perf/util/parse-events.h | 4 +- tools/perf/util/parse-options.c | 22 +++++++ tools/perf/util/path.c | 25 +++++--- tools/perf/util/run-command.c | 6 +- tools/perf/util/symbol.c | 104 ++++++++++++++++---------------- tools/perf/util/symbol.h | 4 +- tools/perf/util/values.c | 7 +-- tools/perf/util/values.h | 2 +- 22 files changed, 194 insertions(+), 148 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0056405e4c93..8608c06f806e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -166,7 +166,12 @@ endif # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = $(M64) -ggdb3 -Wall -Wextra -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 +# +# Include saner warnings here, which can catch bugs: +# +EXTRA_WARNINGS = -Wcast-align -Wformat=2 -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement + +CFLAGS = $(M64) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 $(EXTRA_WARNINGS) LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3bedaa5d21d2..32ff9838351e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -81,7 +81,7 @@ struct hist_entry { struct sort_entry { struct list_head list; - char *header; + const char *header; int64_t (*cmp)(struct hist_entry *, struct hist_entry *); int64_t (*collapse)(struct hist_entry *, struct hist_entry *); @@ -225,7 +225,7 @@ static struct sort_entry sort_sym = { static int sort__need_collapse = 0; struct sort_dimension { - char *name; + const char *name; struct sort_entry *entry; int taken; }; @@ -723,7 +723,7 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) const char *path = NULL; unsigned int hits = 0; double percent = 0.0; - char *color; + const char *color; struct sym_ext *sym_ext = sym->priv; offset = line_ip - start; @@ -805,7 +805,7 @@ static void free_source_line(struct symbol *sym, int len) /* Get the filename:line for the colored entries */ static void -get_source_line(struct symbol *sym, u64 start, int len, char *filename) +get_source_line(struct symbol *sym, u64 start, int len, const char *filename) { int i; char cmd[PATH_MAX * 2]; @@ -851,7 +851,7 @@ get_source_line(struct symbol *sym, u64 start, int len, char *filename) } } -static void print_summary(char *filename) +static void print_summary(const char *filename) { struct sym_ext *sym_ext; struct rb_node *node; @@ -867,7 +867,7 @@ static void print_summary(char *filename) node = rb_first(&root_sym_ext); while (node) { double percent; - char *color; + const char *color; char *path; sym_ext = rb_entry(node, struct sym_ext, node); @@ -882,7 +882,7 @@ static void print_summary(char *filename) static void annotate_sym(struct dso *dso, struct symbol *sym) { - char *filename = dso->name, *d_filename; + const char *filename = dso->name, *d_filename; u64 start, end, len; char command[PATH_MAX*2]; FILE *file; @@ -892,7 +892,7 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) if (sym->module) filename = sym->module->path; else if (dso == kernel_dso) - filename = vmlinux; + filename = vmlinux_name; start = sym->obj_start; if (!start) @@ -964,7 +964,7 @@ static int __cmd_annotate(void) int ret, rc = EXIT_FAILURE; unsigned long offset = 0; unsigned long head = 0; - struct stat stat; + struct stat input_stat; event_t *event; uint32_t size; char *buf; @@ -977,13 +977,13 @@ static int __cmd_annotate(void) exit(-1); } - ret = fstat(input, &stat); + ret = fstat(input, &input_stat); if (ret < 0) { perror("failed to stat file"); exit(-1); } - if (!stat.st_size) { + if (!input_stat.st_size) { fprintf(stderr, "zero-sized file, nothing to do!\n"); exit(0); } @@ -1010,10 +1010,10 @@ static int __cmd_annotate(void) if (head + event->header.size >= page_size * mmap_window) { unsigned long shift = page_size * (head / page_size); - int ret; + int munmap_ret; - ret = munmap(buf, page_size * mmap_window); - assert(ret == 0); + munmap_ret = munmap(buf, page_size * mmap_window); + assert(munmap_ret == 0); offset += shift; head -= shift; @@ -1049,7 +1049,7 @@ static int __cmd_annotate(void) head += size; - if (offset + head < (unsigned long)stat.st_size) + if (offset + head < (unsigned long)input_stat.st_size) goto more; rc = EXIT_SUCCESS; @@ -1092,7 +1092,7 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 2599d86a733b..4fb8734a796e 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -456,6 +456,7 @@ int cmd_help(int argc, const char **argv, const char *prefix __used) break; case HELP_FORMAT_WEB: show_html_page(argv[0]); + default: break; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 298f26b8ac78..3b9d24dc0eda 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -97,6 +97,7 @@ static int repsep_fprintf(FILE *fp, const char *fmt, ...) n = vasprintf(&bf, fmt, ap); if (n > 0) { char *sep = bf; + while (1) { sep = strchr(sep, *field_sep); if (sep == NULL) @@ -144,7 +145,7 @@ struct hist_entry { struct sort_entry { struct list_head list; - char *header; + const char *header; int64_t (*cmp)(struct hist_entry *, struct hist_entry *); int64_t (*collapse)(struct hist_entry *, struct hist_entry *); @@ -328,7 +329,7 @@ static int sort__need_collapse = 0; static int sort__has_parent = 0; struct sort_dimension { - char *name; + const char *name; struct sort_entry *entry; int taken; }; @@ -343,7 +344,7 @@ static struct sort_dimension sort_dimensions[] = { static LIST_HEAD(hist_entry__sort_list); -static int sort_dimension__add(char *tok) +static int sort_dimension__add(const char *tok) { unsigned int i; @@ -602,6 +603,7 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, case CHAIN_GRAPH_REL: ret += callchain__fprintf_graph(fp, chain, total_samples, 1, 1); + case CHAIN_NONE: default: break; } @@ -1290,7 +1292,7 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head) static void trace_event(event_t *event) { unsigned char *raw_event = (void *)event; - char *color = PERF_COLOR_BLUE; + const char *color = PERF_COLOR_BLUE; int i, j; if (!dump_trace) @@ -1348,7 +1350,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); if (show_threads) { - char *name = attr ? __event_name(attr->type, attr->config) + const char *name = attr ? __event_name(attr->type, attr->config) : "unknown"; perf_read_values_add_value(&show_threads_values, event->read.pid, event->read.tid, @@ -1411,19 +1413,19 @@ process_event(event_t *event, unsigned long offset, unsigned long head) static u64 perf_header__sample_type(void) { - u64 sample_type = 0; + u64 type = 0; int i; for (i = 0; i < header->attrs; i++) { struct perf_header_attr *attr = header->attr[i]; - if (!sample_type) - sample_type = attr->attr.sample_type; - else if (sample_type != attr->attr.sample_type) + if (!type) + type = attr->attr.sample_type; + else if (type != attr->attr.sample_type) die("non matching sample_type"); } - return sample_type; + return type; } static int __cmd_report(void) @@ -1431,7 +1433,7 @@ static int __cmd_report(void) int ret, rc = EXIT_FAILURE; unsigned long offset = 0; unsigned long head, shift; - struct stat stat; + struct stat input_stat; event_t *event; uint32_t size; char *buf; @@ -1450,13 +1452,13 @@ static int __cmd_report(void) exit(-1); } - ret = fstat(input, &stat); + ret = fstat(input, &input_stat); if (ret < 0) { perror("failed to stat file"); exit(-1); } - if (!stat.st_size) { + if (!input_stat.st_size) { fprintf(stderr, "zero-sized file, nothing to do!\n"); exit(0); } @@ -1524,12 +1526,12 @@ static int __cmd_report(void) size = 8; if (head + event->header.size >= page_size * mmap_window) { - int ret; + int munmap_ret; shift = page_size * (head / page_size); - ret = munmap(buf, page_size * mmap_window); - assert(ret == 0); + munmap_ret = munmap(buf, page_size * mmap_window); + assert(munmap_ret == 0); offset += shift; head -= shift; @@ -1568,7 +1570,7 @@ static int __cmd_report(void) if (offset + head >= header->data_offset + header->data_size) goto done; - if (offset + head < (unsigned long)stat.st_size) + if (offset + head < (unsigned long)input_stat.st_size) goto more; done: @@ -1666,7 +1668,7 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 9a6dbbff9a9f..06f763e4b35b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -120,7 +120,8 @@ static void parse_source(struct sym_entry *syme) struct module *module; struct section *section = NULL; FILE *file; - char command[PATH_MAX*2], *path = vmlinux; + char command[PATH_MAX*2]; + const char *path = vmlinux_name; u64 start, end, len; if (!syme) @@ -487,10 +488,12 @@ static void print_sym_table(void) ); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { - struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); - struct symbol *sym = (struct symbol *)(syme + 1); + struct symbol *sym; double pcnt; + syme = rb_entry(nd, struct sym_entry, rb_node); + sym = (struct symbol *)(syme + 1); + if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; @@ -609,7 +612,7 @@ static void print_mapped_keys(void) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); - if (vmlinux) { + if (vmlinux_name) { fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -638,7 +641,9 @@ static int key_mapped(int c) case 'F': case 's': case 'S': - return vmlinux ? 1 : 0; + return vmlinux_name ? 1 : 0; + default: + break; } return 0; @@ -724,6 +729,8 @@ static void handle_keypress(int c) case 'z': zero = ~zero; break; + default: + break; } } @@ -812,13 +819,13 @@ static int parse_symbols(void) { struct rb_node *node; struct symbol *sym; - int modules = vmlinux ? 1 : 0; + int use_modules = vmlinux_name ? 1 : 0; kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); if (kernel_dso == NULL) return -1; - if (dso__load_kernel(kernel_dso, vmlinux, symbol_filter, verbose, modules) <= 0) + if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0) goto out_delete_dso; node = rb_first(&kernel_dso->syms); @@ -1114,7 +1121,7 @@ static const struct option options[] = { "system-wide collection from all CPUs"), OPT_INTEGER('C', "CPU", &profile_cpu, "CPU to profile on"), - OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), OPT_INTEGER('r', "realtime", &realtime_prio, diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c index 61d33b81fc97..a791dd467261 100644 --- a/tools/perf/util/abspath.c +++ b/tools/perf/util/abspath.c @@ -50,7 +50,8 @@ const char *make_absolute_path(const char *path) die ("Could not get current working directory"); if (last_elem) { - int len = strlen(buf); + len = strlen(buf); + if (len + strlen(last_elem) + 2 > PATH_MAX) die ("Too long path name: '%s/%s'", buf, last_elem); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 4b50c412b9c5..6f8ea9d210b6 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -52,7 +52,6 @@ extern const char *perf_mailmap_file; extern void maybe_flush_or_die(FILE *, const char *); extern int copy_fd(int ifd, int ofd); extern int copy_file(const char *dst, const char *src, int mode); -extern ssize_t read_in_full(int fd, void *buf, size_t count); extern ssize_t write_in_full(int fd, const void *buf, size_t count); extern void write_or_die(int fd, const void *buf, size_t count); extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 011473411642..3b8380f1b478 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -50,6 +50,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, else p = &(*p)->rb_right; break; + case CHAIN_NONE: default: break; } @@ -143,6 +144,7 @@ int register_callchain_param(struct callchain_param *param) case CHAIN_FLAT: param->sort = sort_chain_flat; break; + case CHAIN_NONE: default: return -1; } diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 90a044d1fe7d..e47fdeb85391 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -242,9 +242,9 @@ int color_fwrite_lines(FILE *fp, const char *color, return 0; } -char *get_percent_color(double percent) +const char *get_percent_color(double percent) { - char *color = PERF_COLOR_NORMAL; + const char *color = PERF_COLOR_NORMAL; /* * We color high-overhead entries in red, mid-overhead @@ -263,7 +263,7 @@ char *get_percent_color(double percent) int percent_color_fprintf(FILE *fp, const char *fmt, double percent) { int r; - char *color; + const char *color; color = get_percent_color(percent); r = color_fprintf(fp, color, fmt, percent); diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 706cec50bd25..43d0d1b67c45 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -36,6 +36,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); int percent_color_fprintf(FILE *fp, const char *fmt, double percent); -char *get_percent_color(double percent); +const char *get_percent_color(double percent); #endif /* COLOR_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 780df541006d..8784649109ce 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -160,17 +160,18 @@ static int get_extended_base_var(char *name, int baselen, int c) name[baselen++] = '.'; for (;;) { - int c = get_next_char(); - if (c == '\n') + int ch = get_next_char(); + + if (ch == '\n') return -1; - if (c == '"') + if (ch == '"') break; - if (c == '\\') { - c = get_next_char(); - if (c == '\n') + if (ch == '\\') { + ch = get_next_char(); + if (ch == '\n') return -1; } - name[baselen++] = c; + name[baselen++] = ch; if (baselen > MAXNAME / 2) return -1; } @@ -530,6 +531,8 @@ static int store_aux(const char* key, const char* value, void *cb __used) store.offset[store.seen] = ftell(config_file); } } + default: + break; } return 0; } @@ -619,6 +622,7 @@ static ssize_t find_beginning_of_line(const char* contents, size_t size, switch (contents[offset]) { case '=': equal_offset = offset; break; case ']': bracket_offset = offset; break; + default: break; } if (offset > 0 && contents[offset-1] == '\\') { offset_ = offset; @@ -742,9 +746,9 @@ int perf_config_set_multivar(const char* key, const char* value, goto write_err_out; } else { struct stat st; - char* contents; + char *contents; ssize_t contents_sz, copy_begin, copy_end; - int i, new_line = 0; + int new_line = 0; if (value_regex == NULL) store.value_regex = NULL; diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c index 34a352867382..2745605dba11 100644 --- a/tools/perf/util/exec_cmd.c +++ b/tools/perf/util/exec_cmd.c @@ -6,7 +6,6 @@ #define MAX_ARGS 32 -extern char **environ; static const char *argv_exec_path; static const char *argv0_path; diff --git a/tools/perf/util/module.c b/tools/perf/util/module.c index ddabe925d65d..3d567fe59c79 100644 --- a/tools/perf/util/module.c +++ b/tools/perf/util/module.c @@ -436,9 +436,9 @@ static int mod_dso__load_module_paths(struct mod_dso *self) goto out_failure; while (!feof(file)) { - char *path, *name, *tmp; + char *name, *tmp; struct module *module; - int line_len, len; + int line_len; line_len = getline(&line, &n, file); if (line_len < 0) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 044178408783..1cda97b39118 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -14,10 +14,10 @@ int nr_counters; struct perf_counter_attr attrs[MAX_COUNTERS]; struct event_symbol { - u8 type; - u64 config; - char *symbol; - char *alias; + u8 type; + u64 config; + const char *symbol; + const char *alias; }; char debugfs_path[MAXPATHLEN]; @@ -51,7 +51,7 @@ static struct event_symbol event_symbols[] = { #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) -static char *hw_event_names[] = { +static const char *hw_event_names[] = { "cycles", "instructions", "cache-references", @@ -61,7 +61,7 @@ static char *hw_event_names[] = { "bus-cycles", }; -static char *sw_event_names[] = { +static const char *sw_event_names[] = { "cpu-clock-msecs", "task-clock-msecs", "page-faults", @@ -73,7 +73,7 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 -static char *hw_cache[][MAX_ALIASES] = { +static const char *hw_cache[][MAX_ALIASES] = { { "L1-dcache", "l1-d", "l1d", "L1-data", }, { "L1-icache", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2" }, @@ -82,13 +82,13 @@ static char *hw_cache[][MAX_ALIASES] = { { "branch", "branches", "bpu", "btb", "bpc", }, }; -static char *hw_cache_op[][MAX_ALIASES] = { +static const char *hw_cache_op[][MAX_ALIASES] = { { "load", "loads", "read", }, { "store", "stores", "write", }, { "prefetch", "prefetches", "speculative-read", "speculative-load", }, }; -static char *hw_cache_result[][MAX_ALIASES] = { +static const char *hw_cache_result[][MAX_ALIASES] = { { "refs", "Reference", "ops", "access", }, { "misses", "miss", }, }; @@ -158,7 +158,7 @@ int valid_debugfs_mount(const char *debugfs) return 0; } -static char *tracepoint_id_to_name(u64 config) +static const char *tracepoint_id_to_name(u64 config) { static char tracepoint_name[2 * MAX_EVENT_LENGTH]; DIR *sys_dir, *evt_dir; @@ -235,7 +235,7 @@ static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) return name; } -char *event_name(int counter) +const char *event_name(int counter) { u64 config = attrs[counter].config; int type = attrs[counter].type; @@ -243,7 +243,7 @@ char *event_name(int counter) return __event_name(type, config); } -char *__event_name(int type, u64 config) +const char *__event_name(int type, u64 config) { static char buf[32]; @@ -294,7 +294,7 @@ char *__event_name(int type, u64 config) return "unknown"; } -static int parse_aliases(const char **str, char *names[][MAX_ALIASES], int size) +static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int size) { int i, j; int n, longest = -1; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 192a962e3a0f..9b1aeea01636 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -9,8 +9,8 @@ extern int nr_counters; extern struct perf_counter_attr attrs[MAX_COUNTERS]; -extern char *event_name(int ctr); -extern char *__event_name(int type, u64 config); +extern const char *event_name(int ctr); +extern const char *__event_name(int type, u64 config); extern int parse_events(const struct option *opt, const char *str, int unset); diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 1bf67190c820..6d8af48c925e 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -53,6 +53,12 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_SET_INT: case OPTION_SET_PTR: return opterror(opt, "takes no value", flags); + case OPTION_END: + case OPTION_ARGUMENT: + case OPTION_GROUP: + case OPTION_STRING: + case OPTION_INTEGER: + case OPTION_LONG: default: break; } @@ -130,6 +136,9 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "expects a numerical value", flags); return 0; + case OPTION_END: + case OPTION_ARGUMENT: + case OPTION_GROUP: default: die("should not happen, someone must be hit on the forehead"); } @@ -296,6 +305,8 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, return parse_options_usage(usagestr, options); case -2: goto unknown; + default: + break; } if (ctx->opt) check_typos(arg + 1, options); @@ -314,6 +325,8 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, ctx->argv[0] = strdup(ctx->opt - 1); *(char *)ctx->argv[0] = '-'; goto unknown; + default: + break; } } continue; @@ -336,6 +349,8 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, return parse_options_usage(usagestr, options); case -2: goto unknown; + default: + break; } continue; unknown: @@ -456,6 +471,13 @@ int usage_with_options_internal(const char * const *usagestr, } break; default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ + case OPTION_END: + case OPTION_GROUP: + case OPTION_BIT: + case OPTION_BOOLEAN: + case OPTION_SET_INT: + case OPTION_SET_PTR: + case OPTION_LONG: break; } diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index a501a40dd2cb..fd1f2faaade4 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -17,7 +17,7 @@ static char bad_path[] = "/bad-path/"; * Two hacks: */ -static char *get_perf_dir(void) +static const char *get_perf_dir(void) { return "."; } @@ -38,8 +38,9 @@ size_t strlcpy(char *dest, const char *src, size_t size) static char *get_pathname(void) { static char pathname_array[4][PATH_MAX]; - static int index; - return pathname_array[3 & ++index]; + static int idx; + + return pathname_array[3 & ++idx]; } static char *cleanup_path(char *path) @@ -161,20 +162,24 @@ int perf_mkstemp(char *path, size_t len, const char *template) } -const char *make_relative_path(const char *abs, const char *base) +const char *make_relative_path(const char *abs_path, const char *base) { static char buf[PATH_MAX + 1]; int baselen; + if (!base) - return abs; + return abs_path; + baselen = strlen(base); - if (prefixcmp(abs, base)) - return abs; - if (abs[baselen] == '/') + if (prefixcmp(abs_path, base)) + return abs_path; + if (abs_path[baselen] == '/') baselen++; else if (base[baselen - 1] != '/') - return abs; - strcpy(buf, abs + baselen); + return abs_path; + + strcpy(buf, abs_path + baselen); + return buf; } diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c index a3935343091a..2b615acf94d7 100644 --- a/tools/perf/util/run-command.c +++ b/tools/perf/util/run-command.c @@ -262,7 +262,7 @@ int run_hook(const char *index_file, const char *name, ...) { struct child_process hook; const char **argv = NULL, *env[2]; - char index[PATH_MAX]; + char idx[PATH_MAX]; va_list args; int ret; size_t i = 0, alloc = 0; @@ -284,8 +284,8 @@ int run_hook(const char *index_file, const char *name, ...) hook.no_stdin = 1; hook.stdout_to_stderr = 1; if (index_file) { - snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); - env[0] = index; + snprintf(idx, sizeof(idx), "PERF_INDEX_FILE=%s", index_file); + env[0] = idx; env[1] = NULL; hook.env = env; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0b9862351260..3159d47ae1cc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -21,7 +21,7 @@ enum dso_origin { static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, - u64 obj_start, int verbose) + u64 obj_start, int v) { size_t namelen = strlen(name) + 1; struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); @@ -29,7 +29,7 @@ static struct symbol *symbol__new(u64 start, u64 len, if (!self) return NULL; - if (verbose >= 2) + if (v >= 2) printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); @@ -156,7 +156,7 @@ size_t dso__fprintf(struct dso *self, FILE *fp) return ret; } -static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose) +static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) { struct rb_node *nd, *prevnd; char *line = NULL; @@ -198,7 +198,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb * Well fix up the end later, when we have all sorted. */ sym = symbol__new(start, 0xdead, line + len + 2, - self->sym_priv_size, 0, verbose); + self->sym_priv_size, 0, v); if (sym == NULL) goto out_delete_line; @@ -239,7 +239,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb return -1; } -static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verbose) +static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v) { char *line = NULL; size_t n; @@ -277,7 +277,7 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verb continue; sym = symbol__new(start, size, line + len, - self->sym_priv_size, start, verbose); + self->sym_priv_size, start, v); if (sym == NULL) goto out_delete_line; @@ -305,13 +305,13 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verb * elf_symtab__for_each_symbol - iterate thru all the symbols * * @self: struct elf_symtab instance to iterate - * @index: uint32_t index + * @idx: uint32_t idx * @sym: GElf_Sym iterator */ -#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \ - for (index = 0, gelf_getsym(syms, index, &sym);\ - index < nr_syms; \ - index++, gelf_getsym(syms, index, &sym)) +#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \ + for (idx = 0, gelf_getsym(syms, idx, &sym);\ + idx < nr_syms; \ + idx++, gelf_getsym(syms, idx, &sym)) static inline uint8_t elf_sym__type(const GElf_Sym *sym) { @@ -354,7 +354,7 @@ static inline const char *elf_sym__name(const GElf_Sym *sym, static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, GElf_Shdr *shp, const char *name, - size_t *index) + size_t *idx) { Elf_Scn *sec = NULL; size_t cnt = 1; @@ -365,8 +365,8 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, gelf_getshdr(sec, shp); str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); if (!strcmp(name, str)) { - if (index) - *index = cnt; + if (idx) + *idx = cnt; break; } ++cnt; @@ -392,7 +392,7 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -static int dso__synthesize_plt_symbols(struct dso *self, int verbose) +static int dso__synthesize_plt_symbols(struct dso *self, int v) { uint32_t nr_rel_entries, idx; GElf_Sym sym; @@ -442,7 +442,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int verbose) goto out_elf_end; /* - * Fetch the relocation section to find the indexes to the GOT + * Fetch the relocation section to find the idxes to the GOT * and the symbols in the .dynsym they refer to. */ reldata = elf_getdata(scn_plt_rel, NULL); @@ -476,7 +476,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int verbose) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, verbose); + sympltname, self->sym_priv_size, 0, v); if (!f) goto out_elf_end; @@ -494,7 +494,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int verbose) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, verbose); + sympltname, self->sym_priv_size, 0, v); if (!f) goto out_elf_end; @@ -518,12 +518,12 @@ static int dso__synthesize_plt_symbols(struct dso *self, int verbose) } static int dso__load_sym(struct dso *self, int fd, const char *name, - symbol_filter_t filter, int verbose, struct module *mod) + symbol_filter_t filter, int v, struct module *mod) { Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; - uint32_t index; + uint32_t idx; GElf_Ehdr ehdr; GElf_Shdr shdr; Elf_Data *syms; @@ -534,14 +534,14 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL) { - if (verbose) + if (v) fprintf(stderr, "%s: cannot read %s ELF file.\n", __func__, name); goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { - if (verbose) + if (v) fprintf(stderr, "%s: cannot get elf header.\n", __func__); goto out_elf_end; } @@ -583,9 +583,9 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, NULL) != NULL); } else self->adjust_symbols = 0; - elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { + elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; - const char *name; + const char *elf_name; char *demangled; u64 obj_start; struct section *section = NULL; @@ -608,7 +608,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, obj_start = sym.st_value; if (self->adjust_symbols) { - if (verbose >= 2) + if (v >= 2) printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); @@ -630,13 +630,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, * DWARF DW_compile_unit has this, but we don't always have access * to it... */ - name = elf_sym__name(&sym, symstrs); - demangled = bfd_demangle(NULL, name, DMGL_PARAMS | DMGL_ANSI); + elf_name = elf_sym__name(&sym, symstrs); + demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); if (demangled != NULL) - name = demangled; + elf_name = demangled; - f = symbol__new(sym.st_value, sym.st_size, name, - self->sym_priv_size, obj_start, verbose); + f = symbol__new(sym.st_value, sym.st_size, elf_name, + self->sym_priv_size, obj_start, v); free(demangled); if (!f) goto out_elf_end; @@ -659,7 +659,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, #define BUILD_ID_SIZE 128 -static char *dso__read_build_id(struct dso *self, int verbose) +static char *dso__read_build_id(struct dso *self, int v) { int i; GElf_Ehdr ehdr; @@ -676,14 +676,14 @@ static char *dso__read_build_id(struct dso *self, int verbose) elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL) { - if (verbose) + if (v) fprintf(stderr, "%s: cannot read %s ELF file.\n", __func__, self->name); goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { - if (verbose) + if (v) fprintf(stderr, "%s: cannot get elf header.\n", __func__); goto out_elf_end; } @@ -706,7 +706,7 @@ static char *dso__read_build_id(struct dso *self, int verbose) ++raw; bid += 2; } - if (verbose >= 2) + if (v >= 2) printf("%s(%s): %s\n", __func__, self->name, build_id); out_elf_end: elf_end(elf); @@ -732,7 +732,7 @@ char dso__symtab_origin(const struct dso *self) return origin[self->origin]; } -int dso__load(struct dso *self, symbol_filter_t filter, int verbose) +int dso__load(struct dso *self, symbol_filter_t filter, int v) { int size = PATH_MAX; char *name = malloc(size), *build_id = NULL; @@ -745,7 +745,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) self->adjust_symbols = 0; if (strncmp(self->name, "/tmp/perf-", 10) == 0) { - ret = dso__load_perf_map(self, filter, verbose); + ret = dso__load_perf_map(self, filter, v); self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : DSO__ORIG_NOT_FOUND; return ret; @@ -764,7 +764,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) snprintf(name, size, "/usr/lib/debug%s", self->name); break; case DSO__ORIG_BUILDID: - build_id = dso__read_build_id(self, verbose); + build_id = dso__read_build_id(self, v); if (build_id != NULL) { snprintf(name, size, "/usr/lib/debug/.build-id/%.2s/%s.debug", @@ -785,7 +785,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, fd, name, filter, verbose, NULL); + ret = dso__load_sym(self, fd, name, filter, v, NULL); close(fd); /* @@ -795,7 +795,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) goto more; if (ret > 0) { - int nr_plt = dso__synthesize_plt_symbols(self, verbose); + int nr_plt = dso__synthesize_plt_symbols(self, v); if (nr_plt > 0) ret += nr_plt; } @@ -807,7 +807,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) } static int dso__load_module(struct dso *self, struct mod_dso *mods, const char *name, - symbol_filter_t filter, int verbose) + symbol_filter_t filter, int v) { struct module *mod = mod_dso__find_module(mods, name); int err = 0, fd; @@ -820,13 +820,13 @@ static int dso__load_module(struct dso *self, struct mod_dso *mods, const char * if (fd < 0) return err; - err = dso__load_sym(self, fd, name, filter, verbose, mod); + err = dso__load_sym(self, fd, name, filter, v, mod); close(fd); return err; } -int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose) +int dso__load_modules(struct dso *self, symbol_filter_t filter, int v) { struct mod_dso *mods = mod_dso__new_dso("modules"); struct module *pos; @@ -844,7 +844,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose) next = rb_first(&mods->mods); while (next) { pos = rb_entry(next, struct module, rb_node); - err = dso__load_module(self, mods, pos->name, filter, verbose); + err = dso__load_module(self, mods, pos->name, filter, v); if (err < 0) break; @@ -887,14 +887,14 @@ static inline void dso__fill_symbol_holes(struct dso *self) } static int dso__load_vmlinux(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose) + symbol_filter_t filter, int v) { int err, fd = open(vmlinux, O_RDONLY); if (fd < 0) return -1; - err = dso__load_sym(self, fd, vmlinux, filter, verbose, NULL); + err = dso__load_sym(self, fd, vmlinux, filter, v, NULL); if (err > 0) dso__fill_symbol_holes(self); @@ -905,18 +905,18 @@ static int dso__load_vmlinux(struct dso *self, const char *vmlinux, } int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose, int modules) + symbol_filter_t filter, int v, int use_modules) { int err = -1; if (vmlinux) { - err = dso__load_vmlinux(self, vmlinux, filter, verbose); - if (err > 0 && modules) - err = dso__load_modules(self, filter, verbose); + err = dso__load_vmlinux(self, vmlinux, filter, v); + if (err > 0 && use_modules) + err = dso__load_modules(self, filter, v); } if (err <= 0) - err = dso__load_kallsyms(self, filter, verbose); + err = dso__load_kallsyms(self, filter, v); if (err > 0) self->origin = DSO__ORIG_KERNEL; @@ -929,7 +929,7 @@ struct dso *kernel_dso; struct dso *vdso; struct dso *hypervisor_dso; -char *vmlinux = "vmlinux"; +const char *vmlinux_name = "vmlinux"; int modules; static void dsos__add(struct dso *dso) @@ -997,7 +997,7 @@ int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); + err = dso__load_kernel(kernel_dso, vmlinux_name, NULL, verbose, modules); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 48b8e5759af9..6e8490716408 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -55,7 +55,7 @@ struct dso { char name[0]; }; -const char *sym_hist_filter; +extern const char *sym_hist_filter; typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym); @@ -87,6 +87,6 @@ extern struct list_head dsos; extern struct dso *kernel_dso; extern struct dso *vdso; extern struct dso *hypervisor_dso; -extern char *vmlinux; +extern const char *vmlinux_name; extern int modules; #endif /* _PERF_SYMBOL_ */ diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 614cfaf4712a..1c15e39f99e3 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -96,7 +96,7 @@ static void perf_read_values__enlarge_counters(struct perf_read_values *values) } static int perf_read_values__findnew_counter(struct perf_read_values *values, - u64 rawid, char *name) + u64 rawid, const char *name) { int i; @@ -116,7 +116,7 @@ static int perf_read_values__findnew_counter(struct perf_read_values *values, void perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, - u64 rawid, char *name, u64 value) + u64 rawid, const char *name, u64 value) { int tindex, cindex; @@ -221,8 +221,7 @@ static void perf_read_values__display_raw(FILE *fp, countwidth, values->value[i][j]); } -void perf_read_values_display(FILE *fp, struct perf_read_values *values, - int raw) +void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw) { if (raw) perf_read_values__display_raw(fp, values); diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index f8960fde0547..cadf8cf2a590 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -19,7 +19,7 @@ void perf_read_values_destroy(struct perf_read_values *values); void perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, - u64 rawid, char *name, u64 value); + u64 rawid, const char *name, u64 value); void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw); From 35ba15b737e2cd1d780943189f2138519f81fd42 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 16 Aug 2009 11:09:21 +0200 Subject: [PATCH 10/60] perf: Build with stack-protector and with -D_FORTIFY_SOURCE=2 Up our defences a bit. Suggested-by: Arjan van de Ven Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8608c06f806e..d637aeadc166 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -171,7 +171,7 @@ endif # EXTRA_WARNINGS = -Wcast-align -Wformat=2 -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement -CFLAGS = $(M64) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 $(EXTRA_WARNINGS) +CFLAGS = $(M64) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) From 0d31b82dd5c54a0b1e1d789427abdcc180bc4602 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Aug 2009 17:52:07 +0200 Subject: [PATCH 11/60] perf tools: Substract -Wformat-nonliteral from Wformat=2 in extra flags The soon coming perf trace needs to use printf with dynamically built formats. But we are using -Wformat=2 which is a shortcut for the following set: -Wformat -Wformat-security -Wformat-y2k -Wformat-nonliteral -Wformat-nonliteral warns when it can't check formats because they are not builtin constant strings, but we want to feature dynamic formats. What we want instead is Wformat=2 minus -Wformat-nonliteral, which is what this patch does. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250437927-25490-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d637aeadc166..5d54ddb83ab1 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -169,7 +169,7 @@ endif # # Include saner warnings here, which can catch bugs: # -EXTRA_WARNINGS = -Wcast-align -Wformat=2 -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement +EXTRA_WARNINGS = -Wcast-align -Wformat -Wformat-security -Wformat-y2k -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement CFLAGS = $(M64) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) LDFLAGS = -lpthread -lrt -lelf -lm From 2cec19d9d0716f8d68f1c5a87667d0387d4d252d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Aug 2009 19:24:21 +0200 Subject: [PATCH 12/60] perf tools: Factorize the dprintf definition We have two users of dprintf: report and annotate. Another one is coming with perf trace. Then factorize it into the debug file. While at it, rename dprintf() to dump_printf() so that it doesn't conflicts with its libc homograph. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250443461-28130-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 40 +++++++++++-------------- tools/perf/builtin-report.c | 56 +++++++++++++++++------------------ tools/perf/util/debug.c | 15 ++++++++++ tools/perf/util/debug.h | 2 ++ 4 files changed, 62 insertions(+), 51 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 32ff9838351e..820e7ccec62d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -34,10 +34,6 @@ static char *sort_order = default_sort_order; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; -static int dump_trace = 0; -#define dprintf(x...) do { if (dump_trace) printf(x); } while (0) - - static int full_paths; static int print_line; @@ -507,14 +503,14 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->ip.pid, &threads, &last_match); - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", + dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, event->ip.pid, (void *)(long)ip); - dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { fprintf(stderr, "problem processing %d event, skipping it.\n", @@ -528,7 +524,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dso = kernel_dso; - dprintf(" ...... dso: %s\n", dso->name); + dump_printf(" ...... dso: %s\n", dso->name); } else if (event->header.misc & PERF_EVENT_MISC_USER) { @@ -549,12 +545,12 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if ((long long)ip < 0) dso = kernel_dso; } - dprintf(" ...... dso: %s\n", dso ? dso->name : ""); + dump_printf(" ...... dso: %s\n", dso ? dso->name : ""); } else { show = SHOW_HV; level = 'H'; - dprintf(" ...... dso: [hypervisor]\n"); + dump_printf(" ...... dso: [hypervisor]\n"); } if (show & show_mask) { @@ -582,7 +578,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -592,7 +588,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); return 0; } @@ -608,14 +604,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) struct thread *thread; thread = threads__findnew(event->comm.pid, &threads, &last_match); - dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { - dprintf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); return -1; } total_comm++; @@ -631,13 +627,13 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", + dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->fork.pid, event->fork.ppid); if (!thread || !parent || thread__fork(thread, parent)) { - dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); return -1; } total_fork++; @@ -1022,14 +1018,14 @@ static int __cmd_annotate(void) size = event->header.size; - dprintf("%p [%p]: event: %d\n", + dump_printf("%p [%p]: event: %d\n", (void *)(offset + head), (void *)(long)event->header.size, event->header.type); if (!size || process_event(event, offset, head) < 0) { - dprintf("%p [%p]: skipping unknown header type: %d\n", + dump_printf("%p [%p]: skipping unknown header type: %d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.type); @@ -1055,11 +1051,11 @@ static int __cmd_annotate(void) rc = EXIT_SUCCESS; close(input); - dprintf(" IP events: %10ld\n", total); - dprintf(" mmap events: %10ld\n", total_mmap); - dprintf(" comm events: %10ld\n", total_comm); - dprintf(" fork events: %10ld\n", total_fork); - dprintf(" unknown events: %10ld\n", total_unknown); + dump_printf(" IP events: %10ld\n", total); + dump_printf(" mmap events: %10ld\n", total_mmap); + dump_printf(" comm events: %10ld\n", total_comm); + dump_printf(" fork events: %10ld\n", total_fork); + dump_printf(" unknown events: %10ld\n", total_unknown); if (dump_trace) return 0; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3b9d24dc0eda..e104ed3c8418 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -43,8 +43,6 @@ static char *field_sep; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; -static int dump_trace = 0; -#define dprintf(x...) do { if (dump_trace) printf(x); } while (0) #define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) static int full_paths; @@ -713,8 +711,8 @@ resolve_symbol(struct thread *thread, struct map **mapp, if ((long long)ip < 0) dso = kernel_dso; } - dprintf(" ...... dso: %s\n", dso ? dso->name : ""); - dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip); + dump_printf(" ...... dso: %s\n", dso ? dso->name : ""); + dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip); *ipp = ip; if (dsop) @@ -1108,7 +1106,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1121,7 +1119,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) chain = (void *)more_data; - dprintf("... chain: nr:%Lu\n", chain->nr); + dump_printf("... chain: nr:%Lu\n", chain->nr); if (validate_chain(chain, event) < 0) { eprintf("call-chain problem with event, skipping it.\n"); @@ -1130,11 +1128,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (dump_trace) { for (i = 0; i < chain->nr; i++) - dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); + dump_printf("..... %2d: %016Lx\n", i, chain->ips[i]); } } - dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { eprintf("problem processing %d event, skipping it.\n", @@ -1153,7 +1151,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dso = kernel_dso; - dprintf(" ...... dso: %s\n", dso->name); + dump_printf(" ...... dso: %s\n", dso->name); } else if (cpumode == PERF_EVENT_MISC_USER) { @@ -1166,7 +1164,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dso = hypervisor_dso; - dprintf(" ...... dso: [hypervisor]\n"); + dump_printf(" ...... dso: [hypervisor]\n"); } if (show & show_mask) { @@ -1197,7 +1195,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -1208,7 +1206,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); return 0; } @@ -1225,14 +1223,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->comm.pid, &threads, &last_match); - dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { - dprintf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); return -1; } total_comm++; @@ -1249,7 +1247,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dprintf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", + dump_printf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT", @@ -1267,7 +1265,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) return 0; if (!thread || !parent || thread__fork(thread, parent)) { - dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); return -1; } total_fork++; @@ -1278,7 +1276,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) static int process_lost_event(event_t *event, unsigned long offset, unsigned long head) { - dprintf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", + dump_printf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->lost.id, @@ -1298,12 +1296,12 @@ static void trace_event(event_t *event) if (!dump_trace) return; - dprintf("."); + dump_printf("."); cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); for (i = 0; i < event->header.size; i++) { if ((i & 15) == 0) { - dprintf("."); + dump_printf("."); cdprintf(" %04x: ", i); } @@ -1322,7 +1320,7 @@ static void trace_event(event_t *event) cdprintf("\n"); } } - dprintf(".\n"); + dump_printf(".\n"); } static struct perf_header *header; @@ -1359,7 +1357,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) event->read.value); } - dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", + dump_printf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), event->read.pid, @@ -1540,14 +1538,14 @@ static int __cmd_report(void) size = event->header.size; - dprintf("\n%p [%p]: event: %d\n", + dump_printf("\n%p [%p]: event: %d\n", (void *)(offset + head), (void *)(long)event->header.size, event->header.type); if (!size || process_event(event, offset, head) < 0) { - dprintf("%p [%p]: skipping unknown header type: %d\n", + dump_printf("%p [%p]: skipping unknown header type: %d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.type); @@ -1577,12 +1575,12 @@ static int __cmd_report(void) rc = EXIT_SUCCESS; close(input); - dprintf(" IP events: %10ld\n", total); - dprintf(" mmap events: %10ld\n", total_mmap); - dprintf(" comm events: %10ld\n", total_comm); - dprintf(" fork events: %10ld\n", total_fork); - dprintf(" lost events: %10ld\n", total_lost); - dprintf(" unknown events: %10ld\n", total_unknown); + dump_printf(" IP events: %10ld\n", total); + dump_printf(" mmap events: %10ld\n", total_mmap); + dump_printf(" comm events: %10ld\n", total_comm); + dump_printf(" fork events: %10ld\n", total_fork); + dump_printf(" lost events: %10ld\n", total_lost); + dump_printf(" unknown events: %10ld\n", total_unknown); if (dump_trace) return 0; diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 7cb8464abe61..8318fdee4778 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -6,6 +6,7 @@ #include int verbose = 0; +int dump_trace = 0; int eprintf(const char *fmt, ...) { @@ -20,3 +21,17 @@ int eprintf(const char *fmt, ...) return ret; } + +int dump_printf(const char *fmt, ...) +{ + va_list args; + int ret = 0; + + if (dump_trace) { + va_start(args, fmt); + ret = vprintf(fmt, args); + va_end(args); + } + + return ret; +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 2ae9090108d3..a683bd571f1c 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -1,5 +1,7 @@ /* For debugging general purposes */ extern int verbose; +extern int dump_trace; int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); From 0f25bfc8d8b31a4ac8e4ff6cbc3911fb7092a4a7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Aug 2009 19:56:54 +0200 Subject: [PATCH 13/60] perf tools: Put the show mode into the event headers files Annotate and report share the same flags to filter events considering their context (kernel, user, hypervisor). Both tools have their own definitions of these flags. Factorize them out into the event headers file. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250445414-29237-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 4 ---- tools/perf/builtin-report.c | 4 ---- tools/perf/util/event.h | 6 ++++++ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 820e7ccec62d..6d751516616d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -22,10 +22,6 @@ #include "util/parse-events.h" #include "util/thread.h" -#define SHOW_KERNEL 1 -#define SHOW_USER 2 -#define SHOW_HV 4 - static char const *input_name = "perf.data"; static char default_sort_order[] = "comm,symbol"; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e104ed3c8418..05d52ff4c33d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -27,10 +27,6 @@ #include "util/thread.h" -#define SHOW_KERNEL 1 -#define SHOW_USER 2 -#define SHOW_HV 4 - static char const *input_name = "perf.data"; static char default_sort_order[] = "comm,dso,symbol"; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index fa7c50b654e1..fa2d4e91d329 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -4,6 +4,12 @@ #include "util.h" #include +enum { + SHOW_KERNEL = 1, + SHOW_USER = 2, + SHOW_HV = 4, +}; + /* * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * */ From 0d3a5c885971de1e3124d85bfadf818abac9ba12 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Aug 2009 20:56:37 +0200 Subject: [PATCH 14/60] perf tools: Librarize sample type and attr finding from headers Librarize the sample type and attr fetching from perf data file headers so that we can also use it from perf trace. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250448997-30715-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 44 +++++-------------------------------- tools/perf/util/header.c | 35 +++++++++++++++++++++++++++++ tools/perf/util/header.h | 4 ++++ 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 05d52ff4c33d..c6326deb1636 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -70,6 +70,8 @@ static int cwdlen; static struct rb_root threads; static struct thread *last_match; +static struct perf_header *header; + static struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_REL, @@ -1319,29 +1321,12 @@ static void trace_event(event_t *event) dump_printf(".\n"); } -static struct perf_header *header; - -static struct perf_counter_attr *perf_header__find_attr(u64 id) -{ - int i; - - for (i = 0; i < header->attrs; i++) { - struct perf_header_attr *attr = header->attr[i]; - int j; - - for (j = 0; j < attr->ids; j++) { - if (attr->id[j] == id) - return &attr->attr; - } - } - - return NULL; -} - static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { - struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); + struct perf_counter_attr *attr; + + attr = perf_header__find_attr(event->read.id, header); if (show_threads) { const char *name = attr ? __event_name(attr->type, attr->config) @@ -1405,23 +1390,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static u64 perf_header__sample_type(void) -{ - u64 type = 0; - int i; - - for (i = 0; i < header->attrs; i++) { - struct perf_header_attr *attr = header->attr[i]; - - if (!type) - type = attr->attr.sample_type; - else if (type != attr->attr.sample_type) - die("non matching sample_type"); - } - - return type; -} - static int __cmd_report(void) { int ret, rc = EXIT_FAILURE; @@ -1460,7 +1428,7 @@ static int __cmd_report(void) header = perf_header__read(input); head = header->data_offset; - sample_type = perf_header__sample_type(); + sample_type = perf_header__sample_type(header); if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b92a457ca32e..a37a2221a0c3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -243,3 +243,38 @@ struct perf_header *perf_header__read(int fd) return self; } + +u64 perf_header__sample_type(struct perf_header *header) +{ + u64 type = 0; + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + + if (!type) + type = attr->attr.sample_type; + else if (type != attr->attr.sample_type) + die("non matching sample_type"); + } + + return type; +} + +struct perf_counter_attr * +perf_header__find_attr(u64 id, struct perf_header *header) +{ + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + int j; + + for (j = 0; j < attr->ids; j++) { + if (attr->id[j] == id) + return &attr->attr; + } + } + + return NULL; +} diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index bf280449fcfd..5d0a72ecc919 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -31,6 +31,10 @@ struct perf_header_attr * perf_header_attr__new(struct perf_counter_attr *attr); void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); +u64 perf_header__sample_type(struct perf_header *header); +struct perf_counter_attr * +perf_header__find_attr(u64 id, struct perf_header *header); + struct perf_header *perf_header__new(void); From 8f28827a162fd1e8da4e96bed69b06d2606e8322 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Aug 2009 22:05:48 +0200 Subject: [PATCH 15/60] perf tools: Librarize trace_event() helper Librarize trace_event() helper so that perf trace can use it too. Also clean up the debug.h includes a bit. It's not good to have it included in perf.h because it doesn't make it flexible against other headers it may need (headers that can also depend on perf.h and then create a recursive header dependency). Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250453149-664-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 1 + tools/perf/builtin-record.c | 1 + tools/perf/builtin-report.c | 39 +---------------------- tools/perf/builtin-stat.c | 2 ++ tools/perf/builtin-top.c | 2 ++ tools/perf/perf.h | 1 - tools/perf/util/color.c | 10 ++++-- tools/perf/util/color.h | 1 + tools/perf/util/debug.c | 58 +++++++++++++++++++++++++++++++++++ tools/perf/util/debug.h | 1 + tools/perf/util/symbol.c | 2 ++ 11 files changed, 76 insertions(+), 42 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6d751516616d..96d421f7161d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -17,6 +17,7 @@ #include "util/string.h" #include "perf.h" +#include "util/debug.h" #include "util/parse-options.h" #include "util/parse-events.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 65b4115e417d..6a5db675ee4f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -16,6 +16,7 @@ #include "util/header.h" #include "util/event.h" +#include "util/debug.h" #include #include diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c6326deb1636..1e3ad22d53dc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -20,6 +20,7 @@ #include "util/values.h" #include "perf.h" +#include "util/debug.h" #include "util/header.h" #include "util/parse-options.h" @@ -39,8 +40,6 @@ static char *field_sep; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; -#define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) - static int full_paths; static int show_nr_samples; @@ -1285,42 +1284,6 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static void trace_event(event_t *event) -{ - unsigned char *raw_event = (void *)event; - const char *color = PERF_COLOR_BLUE; - int i, j; - - if (!dump_trace) - return; - - dump_printf("."); - cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); - - for (i = 0; i < event->header.size; i++) { - if ((i & 15) == 0) { - dump_printf("."); - cdprintf(" %04x: ", i); - } - - cdprintf(" %02x", raw_event[i]); - - if (((i & 15) == 15) || i == event->header.size-1) { - cdprintf(" "); - for (j = 0; j < 15-(i & 15); j++) - cdprintf(" "); - for (j = 0; j < (i & 15); j++) { - if (isprint(raw_event[i-15+j])) - cdprintf("%c", raw_event[i-15+j]); - else - cdprintf("."); - } - cdprintf("\n"); - } - } - dump_printf(".\n"); -} - static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4b9dd4af61a6..1a2626230660 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -42,6 +42,8 @@ #include "util/util.h" #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/event.h" +#include "util/debug.h" #include #include diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 06f763e4b35b..62b55ecab2c6 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -27,6 +27,8 @@ #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/debug.h" + #include #include diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f5509213f030..e5148e2b6134 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -48,7 +48,6 @@ #include "../../include/linux/perf_counter.h" #include "util/types.h" -#include "util/debug.h" /* * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index e47fdeb85391..e88bca55a599 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -166,7 +166,7 @@ int perf_color_default_config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static int color_vfprintf(FILE *fp, const char *color, const char *fmt, +static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args, const char *trail) { int r = 0; @@ -191,6 +191,10 @@ static int color_vfprintf(FILE *fp, const char *color, const char *fmt, return r; } +int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) +{ + return __color_vfprintf(fp, color, fmt, args, NULL); +} int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) @@ -199,7 +203,7 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) int r; va_start(args, fmt); - r = color_vfprintf(fp, color, fmt, args, NULL); + r = color_vfprintf(fp, color, fmt, args); va_end(args); return r; } @@ -209,7 +213,7 @@ int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) va_list args; int r; va_start(args, fmt); - r = color_vfprintf(fp, color, fmt, args, "\n"); + r = __color_vfprintf(fp, color, fmt, args, "\n"); va_end(args); return r; } diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 43d0d1b67c45..58d597564b99 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -32,6 +32,7 @@ int perf_color_default_config(const char *var, const char *value, void *cb); int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); void color_parse(const char *value, const char *var, char *dst); void color_parse_mem(const char *value, int len, const char *var, char *dst); +int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 8318fdee4778..e8ca98fe0bd4 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -1,10 +1,15 @@ /* For general debugging purposes */ #include "../perf.h" + #include #include #include +#include "color.h" +#include "event.h" +#include "debug.h" + int verbose = 0; int dump_trace = 0; @@ -35,3 +40,56 @@ int dump_printf(const char *fmt, ...) return ret; } + +static int dump_printf_color(const char *fmt, const char *color, ...) +{ + va_list args; + int ret = 0; + + if (dump_trace) { + va_start(args, color); + ret = color_vfprintf(stdout, color, fmt, args); + va_end(args); + } + + return ret; +} + + +void trace_event(event_t *event) +{ + unsigned char *raw_event = (void *)event; + const char *color = PERF_COLOR_BLUE; + int i, j; + + if (!dump_trace) + return; + + dump_printf("."); + dump_printf_color("\n. ... raw event: size %d bytes\n", color, + event->header.size); + + for (i = 0; i < event->header.size; i++) { + if ((i & 15) == 0) { + dump_printf("."); + dump_printf_color(" %04x: ", color, i); + } + + dump_printf_color(" %02x", color, raw_event[i]); + + if (((i & 15) == 15) || i == event->header.size-1) { + dump_printf_color(" ", color); + for (j = 0; j < 15-(i & 15); j++) + dump_printf_color(" ", color); + for (j = 0; j < (i & 15); j++) { + if (isprint(raw_event[i-15+j])) + dump_printf_color("%c", color, + raw_event[i-15+j]); + else + dump_printf_color(".", color); + } + dump_printf_color("\n", color); + } + } + dump_printf(".\n"); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index a683bd571f1c..437eea58ce40 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -5,3 +5,4 @@ extern int dump_trace; int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void trace_event(event_t *event); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3159d47ae1cc..fd3d9c8e90fc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -3,6 +3,8 @@ #include "string.h" #include "symbol.h" +#include "debug.h" + #include #include #include From 520509436417901f30106e021e037c75dfe5386c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 17 Aug 2009 16:18:05 +0200 Subject: [PATCH 16/60] perf tools: Add trace event debugfs IO handler Add util/trace-event-info.c which handles ftrace file IO from debugfs and provides general helpers to fetch/save ftrace events informations. This file is a rename of the trace-cmd.c file from the trace-cmd tools, written by Steven Rostedt and Josh Triplett, originated from the git tree: git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/trace-cmd.git This is a perf tools integration. For now, ftrace events information is saved in a separate file than the standard perf.data [fweisbec@gmail.com: various changes for perf tools integration] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: "Luis Claudio R. Goncalves" Cc: Clark Williams Cc: Jon Masters Cc: Mathieu Desnoyers Cc: Christoph Hellwig Cc: Xiao Guangrong Cc: Zhaolei Cc: Li Zefan Cc: Lai Jiangshan Cc: Masami Hiramatsu Cc: Tom Zanussi Cc: "Frank Ch. Eigler" Cc: Roland McGrath Cc: Jason Baron Cc: Paul Mackerras Cc: Jiaying Zhang Cc: Anton Blanchard LKML-Reference: <1250518688-7207-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-info.c | 491 +++++++++++++++++++++++++++++ tools/perf/util/trace-event.h | 238 ++++++++++++++ 2 files changed, 729 insertions(+) create mode 100644 tools/perf/util/trace-event-info.c create mode 100644 tools/perf/util/trace-event.h diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c new file mode 100644 index 000000000000..78adff189bba --- /dev/null +++ b/tools/perf/util/trace-event-info.c @@ -0,0 +1,491 @@ +/* + * Copyright (C) 2008,2009, Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace-event.h" + + +#define VERSION "0.5" + +#define _STR(x) #x +#define STR(x) _STR(x) +#define MAX_PATH 256 + +#define TRACE_CTRL "tracing_on" +#define TRACE "trace" +#define AVAILABLE "available_tracers" +#define CURRENT "current_tracer" +#define ITER_CTRL "trace_options" +#define MAX_LATENCY "tracing_max_latency" + +unsigned int page_size; + +static const char *output_file = "trace.info"; +static int output_fd; + +struct event_list { + struct event_list *next; + const char *event; +}; + +struct events { + struct events *sibling; + struct events *children; + struct events *next; + char *name; +}; + + + +static void die(const char *fmt, ...) +{ + va_list ap; + int ret = errno; + + if (errno) + perror("trace-cmd"); + else + ret = -1; + + va_start(ap, fmt); + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); + exit(ret); +} + +void *malloc_or_die(unsigned int size) +{ + void *data; + + data = malloc(size); + if (!data) + die("malloc"); + return data; +} + +static const char *find_debugfs(void) +{ + static char debugfs[MAX_PATH+1]; + static int debugfs_found; + char type[100]; + FILE *fp; + + if (debugfs_found) + return debugfs; + + if ((fp = fopen("/proc/mounts","r")) == NULL) + die("Can't open /proc/mounts for read"); + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + debugfs, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) + die("debugfs not mounted, please mount"); + + debugfs_found = 1; + + return debugfs; +} + +/* + * Finds the path to the debugfs/tracing + * Allocates the string and stores it. + */ +static const char *find_tracing_dir(void) +{ + static char *tracing; + static int tracing_found; + const char *debugfs; + + if (tracing_found) + return tracing; + + debugfs = find_debugfs(); + + tracing = malloc_or_die(strlen(debugfs) + 9); + + sprintf(tracing, "%s/tracing", debugfs); + + tracing_found = 1; + return tracing; +} + +static char *get_tracing_file(const char *name) +{ + const char *tracing; + char *file; + + tracing = find_tracing_dir(); + if (!tracing) + return NULL; + + file = malloc_or_die(strlen(tracing) + strlen(name) + 2); + + sprintf(file, "%s/%s", tracing, name); + return file; +} + +static void put_tracing_file(char *file) +{ + free(file); +} + +static ssize_t write_or_die(const void *buf, size_t len) +{ + int ret; + + ret = write(output_fd, buf, len); + if (ret < 0) + die("writing to '%s'", output_file); + + return ret; +} + +int bigendian(void) +{ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; + unsigned int *ptr; + + ptr = (unsigned int *)str; + return *ptr == 0x01020304; +} + +static unsigned long long copy_file_fd(int fd) +{ + unsigned long long size = 0; + char buf[BUFSIZ]; + int r; + + do { + r = read(fd, buf, BUFSIZ); + if (r > 0) { + size += r; + write_or_die(buf, r); + } + } while (r > 0); + + return size; +} + +static unsigned long long copy_file(const char *file) +{ + unsigned long long size = 0; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + die("Can't read '%s'", file); + size = copy_file_fd(fd); + close(fd); + + return size; +} + +static unsigned long get_size_fd(int fd) +{ + unsigned long long size = 0; + char buf[BUFSIZ]; + int r; + + do { + r = read(fd, buf, BUFSIZ); + if (r > 0) + size += r; + } while (r > 0); + + lseek(fd, 0, SEEK_SET); + + return size; +} + +static unsigned long get_size(const char *file) +{ + unsigned long long size = 0; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + die("Can't read '%s'", file); + size = get_size_fd(fd); + close(fd); + + return size; +} + +static void read_header_files(void) +{ + unsigned long long size, check_size; + char *path; + int fd; + + path = get_tracing_file("events/header_page"); + fd = open(path, O_RDONLY); + if (fd < 0) + die("can't read '%s'", path); + + /* unfortunately, you can not stat debugfs files for size */ + size = get_size_fd(fd); + + write_or_die("header_page", 12); + write_or_die(&size, 8); + check_size = copy_file_fd(fd); + if (size != check_size) + die("wrong size for '%s' size=%lld read=%lld", + path, size, check_size); + put_tracing_file(path); + + path = get_tracing_file("events/header_event"); + fd = open(path, O_RDONLY); + if (fd < 0) + die("can't read '%s'", path); + + size = get_size_fd(fd); + + write_or_die("header_event", 13); + write_or_die(&size, 8); + check_size = copy_file_fd(fd); + if (size != check_size) + die("wrong size for '%s'", path); + put_tracing_file(path); +} + +static void copy_event_system(const char *sys) +{ + unsigned long long size, check_size; + struct dirent *dent; + struct stat st; + char *format; + DIR *dir; + int count = 0; + int ret; + + dir = opendir(sys); + if (!dir) + die("can't read directory '%s'", sys); + + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); + sprintf(format, "%s/%s/format", sys, dent->d_name); + ret = stat(format, &st); + free(format); + if (ret < 0) + continue; + count++; + } + + write_or_die(&count, 4); + + rewinddir(dir); + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); + sprintf(format, "%s/%s/format", sys, dent->d_name); + ret = stat(format, &st); + + if (ret >= 0) { + /* unfortunately, you can not stat debugfs files for size */ + size = get_size(format); + write_or_die(&size, 8); + check_size = copy_file(format); + if (size != check_size) + die("error in size of file '%s'", format); + } + + free(format); + } +} + +static void read_ftrace_files(void) +{ + char *path; + + path = get_tracing_file("events/ftrace"); + + copy_event_system(path); + + put_tracing_file(path); +} + +static void read_event_files(void) +{ + struct dirent *dent; + struct stat st; + char *path; + char *sys; + DIR *dir; + int count = 0; + int ret; + + path = get_tracing_file("events"); + + dir = opendir(path); + if (!dir) + die("can't read directory '%s'", path); + + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0 || + strcmp(dent->d_name, "ftrace") == 0) + continue; + sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); + sprintf(sys, "%s/%s", path, dent->d_name); + ret = stat(sys, &st); + free(sys); + if (ret < 0) + continue; + if (S_ISDIR(st.st_mode)) + count++; + } + + write_or_die(&count, 4); + + rewinddir(dir); + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0 || + strcmp(dent->d_name, "ftrace") == 0) + continue; + sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); + sprintf(sys, "%s/%s", path, dent->d_name); + ret = stat(sys, &st); + if (ret >= 0) { + if (S_ISDIR(st.st_mode)) { + write_or_die(dent->d_name, strlen(dent->d_name) + 1); + copy_event_system(sys); + } + } + free(sys); + } + + put_tracing_file(path); +} + +static void read_proc_kallsyms(void) +{ + unsigned int size, check_size; + const char *path = "/proc/kallsyms"; + struct stat st; + int ret; + + ret = stat(path, &st); + if (ret < 0) { + /* not found */ + size = 0; + write_or_die(&size, 4); + return; + } + size = get_size(path); + write_or_die(&size, 4); + check_size = copy_file(path); + if (size != check_size) + die("error in size of file '%s'", path); + +} + +static void read_ftrace_printk(void) +{ + unsigned int size, check_size; + const char *path; + struct stat st; + int ret; + + path = get_tracing_file("printk_formats"); + ret = stat(path, &st); + if (ret < 0) { + /* not found */ + size = 0; + write_or_die(&size, 4); + return; + } + size = get_size(path); + write_or_die(&size, 4); + check_size = copy_file(path); + if (size != check_size) + die("error in size of file '%s'", path); + +} + +void read_tracing_data(void) +{ + char buf[BUFSIZ]; + + output_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + if (output_fd < 0) + die("creating file '%s'", output_file); + + buf[0] = 23; + buf[1] = 8; + buf[2] = 68; + memcpy(buf + 3, "tracing", 7); + + write_or_die(buf, 10); + + write_or_die(VERSION, strlen(VERSION) + 1); + + /* save endian */ + if (bigendian()) + buf[0] = 1; + else + buf[0] = 0; + + write_or_die(buf, 1); + + /* save size of long */ + buf[0] = sizeof(long); + write_or_die(buf, 1); + + /* save page_size */ + page_size = getpagesize(); + write_or_die(&page_size, 4); + + read_header_files(); + read_ftrace_files(); + read_event_files(); + read_proc_kallsyms(); + read_ftrace_printk(); +} diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h new file mode 100644 index 000000000000..3ddb8947be8a --- /dev/null +++ b/tools/perf/util/trace-event.h @@ -0,0 +1,238 @@ +#ifndef _PARSE_EVENTS_H +#define _PARSE_EVENTS_H + + +#define __unused __attribute__((unused)) + + +#ifndef PAGE_MASK +#define PAGE_MASK (page_size - 1) +#endif + +enum { + RINGBUF_TYPE_PADDING = 29, + RINGBUF_TYPE_TIME_EXTEND = 30, + RINGBUF_TYPE_TIME_STAMP = 31, +}; + +#ifndef TS_SHIFT +#define TS_SHIFT 27 +#endif + +#define NSECS_PER_SEC 1000000000ULL +#define NSECS_PER_USEC 1000ULL + +enum format_flags { + FIELD_IS_ARRAY = 1, + FIELD_IS_POINTER = 2, +}; + +struct format_field { + struct format_field *next; + char *type; + char *name; + int offset; + int size; + unsigned long flags; +}; + +struct format { + int nr_common; + int nr_fields; + struct format_field *common_fields; + struct format_field *fields; +}; + +struct print_arg_atom { + char *atom; +}; + +struct print_arg_string { + char *string; +}; + +struct print_arg_field { + char *name; + struct format_field *field; +}; + +struct print_flag_sym { + struct print_flag_sym *next; + char *value; + char *str; +}; + +struct print_arg_typecast { + char *type; + struct print_arg *item; +}; + +struct print_arg_flags { + struct print_arg *field; + char *delim; + struct print_flag_sym *flags; +}; + +struct print_arg_symbol { + struct print_arg *field; + struct print_flag_sym *symbols; +}; + +struct print_arg; + +struct print_arg_op { + char *op; + int prio; + struct print_arg *left; + struct print_arg *right; +}; + +struct print_arg_func { + char *name; + struct print_arg *args; +}; + +enum print_arg_type { + PRINT_NULL, + PRINT_ATOM, + PRINT_FIELD, + PRINT_FLAGS, + PRINT_SYMBOL, + PRINT_TYPE, + PRINT_STRING, + PRINT_OP, +}; + +struct print_arg { + struct print_arg *next; + enum print_arg_type type; + union { + struct print_arg_atom atom; + struct print_arg_field field; + struct print_arg_typecast typecast; + struct print_arg_flags flags; + struct print_arg_symbol symbol; + struct print_arg_func func; + struct print_arg_string string; + struct print_arg_op op; + }; +}; + +struct print_fmt { + char *format; + struct print_arg *args; +}; + +struct event { + struct event *next; + char *name; + int id; + int flags; + struct format format; + struct print_fmt print_fmt; +}; + +enum { + EVENT_FL_ISFTRACE = 1, + EVENT_FL_ISPRINT = 2, + EVENT_FL_ISBPRINT = 4, + EVENT_FL_ISFUNC = 8, + EVENT_FL_ISFUNCENT = 16, + EVENT_FL_ISFUNCRET = 32, +}; + +struct record { + unsigned long long ts; + int size; + void *data; +}; + +struct record *trace_peek_data(int cpu); +struct record *trace_read_data(int cpu); + +void parse_set_info(int nr_cpus, int long_sz); + +void trace_report(void); + +void *malloc_or_die(unsigned int size); + +void parse_cmdlines(char *file, int size); +void parse_proc_kallsyms(char *file, unsigned int size); +void parse_ftrace_printk(char *file, unsigned int size); + +void print_funcs(void); +void print_printk(void); + +int parse_ftrace_file(char *buf, unsigned long size); +int parse_event_file(char *buf, unsigned long size, char *system); +void print_event(int cpu, void *data, int size, unsigned long long nsecs, + char *comm); + +extern int file_bigendian; +extern int host_bigendian; + +int bigendian(void); + +static inline unsigned short __data2host2(unsigned short data) +{ + unsigned short swap; + + if (host_bigendian == file_bigendian) + return data; + + swap = ((data & 0xffULL) << 8) | + ((data & (0xffULL << 8)) >> 8); + + return swap; +} + +static inline unsigned int __data2host4(unsigned int data) +{ + unsigned int swap; + + if (host_bigendian == file_bigendian) + return data; + + swap = ((data & 0xffULL) << 24) | + ((data & (0xffULL << 8)) << 8) | + ((data & (0xffULL << 16)) >> 8) | + ((data & (0xffULL << 24)) >> 24); + + return swap; +} + +static inline unsigned long long __data2host8(unsigned long long data) +{ + unsigned long long swap; + + if (host_bigendian == file_bigendian) + return data; + + swap = ((data & 0xffULL) << 56) | + ((data & (0xffULL << 8)) << 40) | + ((data & (0xffULL << 16)) << 24) | + ((data & (0xffULL << 24)) << 8) | + ((data & (0xffULL << 32)) >> 8) | + ((data & (0xffULL << 40)) >> 24) | + ((data & (0xffULL << 48)) >> 40) | + ((data & (0xffULL << 56)) >> 56); + + return swap; +} + +#define data2host2(ptr) __data2host2(*(unsigned short *)ptr) +#define data2host4(ptr) __data2host4(*(unsigned int *)ptr) +#define data2host8(ptr) __data2host8(*(unsigned long long *)ptr) + +extern int header_page_ts_offset; +extern int header_page_ts_size; +extern int header_page_size_offset; +extern int header_page_size_size; +extern int header_page_data_offset; +extern int header_page_data_size; + +int parse_header_page(char *buf, unsigned long size); + +void read_tracing_data(void); + +#endif /* _PARSE_EVENTS_H */ From 538bafb5cc92a86d97b427421231f185574fe3db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 17 Aug 2009 16:18:06 +0200 Subject: [PATCH 17/60] perf tools: Add trace event debugfs stream reader Add util/trace-event-read.c which handles trace events informations reading. This file is a rename of the trace-read.c file from the trace-cmd tools, written by Steven Rostedt and Josh Triplett, originated from the git tree: git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/trace-cmd.git This is its perf tools integration. [ fweisbec@gmail.com: various changes for perf tools integration. ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: "Luis Claudio R. Goncalves" Cc: Clark Williams Cc: Jon Masters Cc: Mathieu Desnoyers Cc: Christoph Hellwig Cc: Xiao Guangrong Cc: Zhaolei Cc: Li Zefan Cc: Lai Jiangshan Cc: Masami Hiramatsu Cc: Tom Zanussi Cc: "Frank Ch. Eigler" Cc: Roland McGrath Cc: Jason Baron Cc: Paul Mackerras Cc: Jiaying Zhang Cc: Anton Blanchard LKML-Reference: <1250518688-7207-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-read.c | 508 +++++++++++++++++++++++++++++ 1 file changed, 508 insertions(+) create mode 100644 tools/perf/util/trace-event-read.c diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c new file mode 100644 index 000000000000..1dac301ae54f --- /dev/null +++ b/tools/perf/util/trace-event-read.c @@ -0,0 +1,508 @@ +/* + * Copyright (C) 2009, Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#define _LARGEFILE64_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "trace-event.h" + +static int input_fd; + +static int read_page; + +int file_bigendian; +int host_bigendian; +static int long_size; + +static unsigned long page_size; + +static int read_or_die(void *data, int size) +{ + int r; + + r = read(input_fd, data, size); + if (r != size) + die("reading input file (size expected=%d received=%d)", + size, r); + return r; +} + +static unsigned int read4(void) +{ + unsigned int data; + + read_or_die(&data, 4); + return __data2host4(data); +} + +static unsigned long long read8(void) +{ + unsigned long long data; + + read_or_die(&data, 8); + return __data2host8(data); +} + +static char *read_string(void) +{ + char buf[BUFSIZ]; + char *str = NULL; + int size = 0; + int i; + int r; + + for (;;) { + r = read(input_fd, buf, BUFSIZ); + if (r < 0) + die("reading input file"); + + if (!r) + die("no data"); + + for (i = 0; i < r; i++) { + if (!buf[i]) + break; + } + if (i < r) + break; + + if (str) { + size += BUFSIZ; + str = realloc(str, size); + if (!str) + die("malloc of size %d", size); + memcpy(str + (size - BUFSIZ), buf, BUFSIZ); + } else { + size = BUFSIZ; + str = malloc_or_die(size); + memcpy(str, buf, size); + } + } + + /* move the file descriptor to the end of the string */ + r = lseek(input_fd, -(r - (i+1)), SEEK_CUR); + if (r < 0) + die("lseek"); + + if (str) { + size += i; + str = realloc(str, size); + if (!str) + die("malloc of size %d", size); + memcpy(str + (size - i), buf, i); + } else { + size = i; + str = malloc_or_die(i); + memcpy(str, buf, i); + } + + return str; +} + +static void read_proc_kallsyms(void) +{ + unsigned int size; + char *buf; + + size = read4(); + if (!size) + return; + + buf = malloc_or_die(size); + read_or_die(buf, size); + + parse_proc_kallsyms(buf, size); + + free(buf); +} + +static void read_ftrace_printk(void) +{ + unsigned int size; + char *buf; + + size = read4(); + if (!size) + return; + + buf = malloc_or_die(size); + read_or_die(buf, size); + + parse_ftrace_printk(buf, size); + + free(buf); +} + +static void read_header_files(void) +{ + unsigned long long size; + char *header_page; + char *header_event; + char buf[BUFSIZ]; + + read_or_die(buf, 12); + + if (memcmp(buf, "header_page", 12) != 0) + die("did not read header page"); + + size = read8(); + header_page = malloc_or_die(size); + read_or_die(header_page, size); + parse_header_page(header_page, size); + free(header_page); + + /* + * The size field in the page is of type long, + * use that instead, since it represents the kernel. + */ + long_size = header_page_size_size; + + read_or_die(buf, 13); + if (memcmp(buf, "header_event", 13) != 0) + die("did not read header event"); + + size = read8(); + header_event = malloc_or_die(size); + read_or_die(header_event, size); + free(header_event); +} + +static void read_ftrace_file(unsigned long long size) +{ + char *buf; + + buf = malloc_or_die(size); + read_or_die(buf, size); + parse_ftrace_file(buf, size); + free(buf); +} + +static void read_event_file(char *sys, unsigned long long size) +{ + char *buf; + + buf = malloc_or_die(size); + read_or_die(buf, size); + parse_event_file(buf, size, sys); + free(buf); +} + +static void read_ftrace_files(void) +{ + unsigned long long size; + int count; + int i; + + count = read4(); + + for (i = 0; i < count; i++) { + size = read8(); + read_ftrace_file(size); + } +} + +static void read_event_files(void) +{ + unsigned long long size; + char *sys; + int systems; + int count; + int i,x; + + systems = read4(); + + for (i = 0; i < systems; i++) { + sys = read_string(); + + count = read4(); + for (x=0; x < count; x++) { + size = read8(); + read_event_file(sys, size); + } + } +} + +struct cpu_data { + unsigned long long offset; + unsigned long long size; + unsigned long long timestamp; + struct record *next; + char *page; + int cpu; + int index; + int page_size; +}; + +static struct cpu_data *cpu_data; + +static void update_cpu_data_index(int cpu) +{ + cpu_data[cpu].offset += page_size; + cpu_data[cpu].size -= page_size; + cpu_data[cpu].index = 0; +} + +static void get_next_page(int cpu) +{ + off64_t save_seek; + off64_t ret; + + if (!cpu_data[cpu].page) + return; + + if (read_page) { + if (cpu_data[cpu].size <= page_size) { + free(cpu_data[cpu].page); + cpu_data[cpu].page = NULL; + return; + } + + update_cpu_data_index(cpu); + + /* other parts of the code may expect the pointer to not move */ + save_seek = lseek64(input_fd, 0, SEEK_CUR); + + ret = lseek64(input_fd, cpu_data[cpu].offset, SEEK_SET); + if (ret < 0) + die("failed to lseek"); + ret = read(input_fd, cpu_data[cpu].page, page_size); + if (ret < 0) + die("failed to read page"); + + /* reset the file pointer back */ + lseek64(input_fd, save_seek, SEEK_SET); + + return; + } + + munmap(cpu_data[cpu].page, page_size); + cpu_data[cpu].page = NULL; + + if (cpu_data[cpu].size <= page_size) + return; + + update_cpu_data_index(cpu); + + cpu_data[cpu].page = mmap(NULL, page_size, PROT_READ, MAP_PRIVATE, + input_fd, cpu_data[cpu].offset); + if (cpu_data[cpu].page == MAP_FAILED) + die("failed to mmap cpu %d at offset 0x%llx", + cpu, cpu_data[cpu].offset); +} + +static unsigned int type_len4host(unsigned int type_len_ts) +{ + if (file_bigendian) + return (type_len_ts >> 27) & ((1 << 5) - 1); + else + return type_len_ts & ((1 << 5) - 1); +} + +static unsigned int ts4host(unsigned int type_len_ts) +{ + if (file_bigendian) + return type_len_ts & ((1 << 27) - 1); + else + return type_len_ts >> 5; +} + +static int calc_index(void *ptr, int cpu) +{ + return (unsigned long)ptr - (unsigned long)cpu_data[cpu].page; +} + +struct record *trace_peek_data(int cpu) +{ + struct record *data; + void *page = cpu_data[cpu].page; + int idx = cpu_data[cpu].index; + void *ptr = page + idx; + unsigned long long extend; + unsigned int type_len_ts; + unsigned int type_len; + unsigned int delta; + unsigned int length = 0; + + if (cpu_data[cpu].next) + return cpu_data[cpu].next; + + if (!page) + return NULL; + + if (!idx) { + /* FIXME: handle header page */ + if (header_page_ts_size != 8) + die("expected a long long type for timestamp"); + cpu_data[cpu].timestamp = data2host8(ptr); + ptr += 8; + switch (header_page_size_size) { + case 4: + cpu_data[cpu].page_size = data2host4(ptr); + ptr += 4; + break; + case 8: + cpu_data[cpu].page_size = data2host8(ptr); + ptr += 8; + break; + default: + die("bad long size"); + } + ptr = cpu_data[cpu].page + header_page_data_offset; + } + +read_again: + idx = calc_index(ptr, cpu); + + if (idx >= cpu_data[cpu].page_size) { + get_next_page(cpu); + return trace_peek_data(cpu); + } + + type_len_ts = data2host4(ptr); + ptr += 4; + + type_len = type_len4host(type_len_ts); + delta = ts4host(type_len_ts); + + switch (type_len) { + case RINGBUF_TYPE_PADDING: + if (!delta) + die("error, hit unexpected end of page"); + length = data2host4(ptr); + ptr += 4; + length *= 4; + ptr += length; + goto read_again; + + case RINGBUF_TYPE_TIME_EXTEND: + extend = data2host4(ptr); + ptr += 4; + extend <<= TS_SHIFT; + extend += delta; + cpu_data[cpu].timestamp += extend; + goto read_again; + + case RINGBUF_TYPE_TIME_STAMP: + ptr += 12; + break; + case 0: + length = data2host4(ptr); + ptr += 4; + die("here! length=%d", length); + break; + default: + length = type_len * 4; + break; + } + + cpu_data[cpu].timestamp += delta; + + data = malloc_or_die(sizeof(*data)); + memset(data, 0, sizeof(*data)); + + data->ts = cpu_data[cpu].timestamp; + data->size = length; + data->data = ptr; + ptr += length; + + cpu_data[cpu].index = calc_index(ptr, cpu); + cpu_data[cpu].next = data; + + return data; +} + +struct record *trace_read_data(int cpu) +{ + struct record *data; + + data = trace_peek_data(cpu); + cpu_data[cpu].next = NULL; + + return data; +} + +void trace_report (void) +{ + const char *input_file = "trace.info"; + char buf[BUFSIZ]; + char test[] = { 23, 8, 68 }; + char *version; + int show_funcs = 0; + int show_printk = 0; + + input_fd = open(input_file, O_RDONLY); + if (input_fd < 0) + die("opening '%s'\n", input_file); + + read_or_die(buf, 3); + if (memcmp(buf, test, 3) != 0) + die("not an trace data file"); + + read_or_die(buf, 7); + if (memcmp(buf, "tracing", 7) != 0) + die("not a trace file (missing tracing)"); + + version = read_string(); + printf("version = %s\n", version); + free(version); + + read_or_die(buf, 1); + file_bigendian = buf[0]; + host_bigendian = bigendian(); + + read_or_die(buf, 1); + long_size = buf[0]; + + page_size = read4(); + + read_header_files(); + + read_ftrace_files(); + read_event_files(); + read_proc_kallsyms(); + read_ftrace_printk(); + + if (show_funcs) { + print_funcs(); + return; + } + if (show_printk) { + print_printk(); + return; + } + + return; +} From ea4010d1363699770a9894493bafe556a59a144c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 17 Aug 2009 16:18:07 +0200 Subject: [PATCH 18/60] perf tools: Add trace event information parser Add util/trace-event-parse.c which provides the handlers to parse the ftrace events info from the stream and handles the ftrace perf samples event printing. This file is a rename of the parse-events.c file from the trace-cmd tools, written by Steven Rostedt and Josh Triplett, originated from the git tree: git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/trace-cmd.git This is a perf tools integration. [ fweisbec@gmail.com: various changes for perf tools integration. ] Signed-off-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: "Luis Claudio R. Goncalves" Cc: Clark Williams Cc: Jon Masters Cc: Mathieu Desnoyers Cc: Christoph Hellwig Cc: Xiao Guangrong Cc: Zhaolei Cc: Li Zefan Cc: Lai Jiangshan Cc: Masami Hiramatsu Cc: Tom Zanussi Cc: "Frank Ch. Eigler" Cc: Roland McGrath Cc: Jason Baron Cc: Paul Mackerras Cc: Jiaying Zhang Cc: Anton Blanchard LKML-Reference: <1250518688-7207-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-parse.c | 2905 +++++++++++++++++++++++++++ 1 file changed, 2905 insertions(+) create mode 100644 tools/perf/util/trace-event-parse.c diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c new file mode 100644 index 000000000000..ead6a9ad3599 --- /dev/null +++ b/tools/perf/util/trace-event-parse.c @@ -0,0 +1,2905 @@ +/* + * Copyright (C) 2009, Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * The parts for function graph printing was taken and modified from the + * Linux Kernel that were written by Frederic Weisbecker. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#undef _GNU_SOURCE +#include "util.h" +#include "trace-event.h" + +int header_page_ts_offset; +int header_page_ts_size; +int header_page_size_offset; +int header_page_size_size; +int header_page_data_offset; +int header_page_data_size; + +static char *input_buf; +static unsigned long long input_buf_ptr; +static unsigned long long input_buf_siz; + +static int cpus; +static int long_size; + +static void init_input_buf(char *buf, unsigned long long size) +{ + input_buf = buf; + input_buf_siz = size; + input_buf_ptr = 0; +} + +struct cmdline { + char *comm; + int pid; +}; + +static struct cmdline *cmdlines; +static int cmdline_count; + +static int cmdline_cmp(const void *a, const void *b) +{ + const struct cmdline *ca = a; + const struct cmdline *cb = b; + + if (ca->pid < cb->pid) + return -1; + if (ca->pid > cb->pid) + return 1; + + return 0; +} + +void parse_cmdlines(char *file, int size __unused) +{ + struct cmdline_list { + struct cmdline_list *next; + char *comm; + int pid; + } *list = NULL, *item; + char *line; + char *next = NULL; + int i; + + line = strtok_r(file, "\n", &next); + while (line) { + item = malloc_or_die(sizeof(*item)); + sscanf(line, "%d %as", &item->pid, + (float *)&item->comm); /* workaround gcc warning */ + item->next = list; + list = item; + line = strtok_r(NULL, "\n", &next); + cmdline_count++; + } + + cmdlines = malloc_or_die(sizeof(*cmdlines) * cmdline_count); + + i = 0; + while (list) { + cmdlines[i].pid = list->pid; + cmdlines[i].comm = list->comm; + i++; + item = list; + list = list->next; + free(item); + } + + qsort(cmdlines, cmdline_count, sizeof(*cmdlines), cmdline_cmp); +} + +static struct func_map { + unsigned long long addr; + char *func; + char *mod; +} *func_list; +static unsigned int func_count; + +static int func_cmp(const void *a, const void *b) +{ + const struct func_map *fa = a; + const struct func_map *fb = b; + + if (fa->addr < fb->addr) + return -1; + if (fa->addr > fb->addr) + return 1; + + return 0; +} + +void parse_proc_kallsyms(char *file, unsigned int size __unused) +{ + struct func_list { + struct func_list *next; + unsigned long long addr; + char *func; + char *mod; + } *list = NULL, *item; + char *line; + char *next = NULL; + char *addr_str; + char ch; + int ret; + int i; + + line = strtok_r(file, "\n", &next); + while (line) { + item = malloc_or_die(sizeof(*item)); + item->mod = NULL; + ret = sscanf(line, "%as %c %as\t[%as", + (float *)&addr_str, /* workaround gcc warning */ + &ch, + (float *)&item->func, + (float *)&item->mod); + item->addr = strtoull(addr_str, NULL, 16); + free(addr_str); + + /* truncate the extra ']' */ + if (item->mod) + item->mod[strlen(item->mod) - 1] = 0; + + + item->next = list; + list = item; + line = strtok_r(NULL, "\n", &next); + func_count++; + } + + func_list = malloc_or_die(sizeof(*func_list) * func_count + 1); + + i = 0; + while (list) { + func_list[i].func = list->func; + func_list[i].addr = list->addr; + func_list[i].mod = list->mod; + i++; + item = list; + list = list->next; + free(item); + } + + qsort(func_list, func_count, sizeof(*func_list), func_cmp); + + /* + * Add a special record at the end. + */ + func_list[func_count].func = NULL; + func_list[func_count].addr = 0; + func_list[func_count].mod = NULL; +} + +/* + * We are searching for a record in between, not an exact + * match. + */ +static int func_bcmp(const void *a, const void *b) +{ + const struct func_map *fa = a; + const struct func_map *fb = b; + + if ((fa->addr == fb->addr) || + + (fa->addr > fb->addr && + fa->addr < (fb+1)->addr)) + return 0; + + if (fa->addr < fb->addr) + return -1; + + return 1; +} + +static struct func_map *find_func(unsigned long long addr) +{ + struct func_map *func; + struct func_map key; + + key.addr = addr; + + func = bsearch(&key, func_list, func_count, sizeof(*func_list), + func_bcmp); + + return func; +} + +void print_funcs(void) +{ + int i; + + for (i = 0; i < (int)func_count; i++) { + printf("%016llx %s", + func_list[i].addr, + func_list[i].func); + if (func_list[i].mod) + printf(" [%s]\n", func_list[i].mod); + else + printf("\n"); + } +} + +static struct printk_map { + unsigned long long addr; + char *printk; +} *printk_list; +static unsigned int printk_count; + +static int printk_cmp(const void *a, const void *b) +{ + const struct func_map *fa = a; + const struct func_map *fb = b; + + if (fa->addr < fb->addr) + return -1; + if (fa->addr > fb->addr) + return 1; + + return 0; +} + +static struct printk_map *find_printk(unsigned long long addr) +{ + struct printk_map *printk; + struct printk_map key; + + key.addr = addr; + + printk = bsearch(&key, printk_list, printk_count, sizeof(*printk_list), + printk_cmp); + + return printk; +} + +void parse_ftrace_printk(char *file, unsigned int size __unused) +{ + struct printk_list { + struct printk_list *next; + unsigned long long addr; + char *printk; + } *list = NULL, *item; + char *line; + char *next = NULL; + char *addr_str; + int ret; + int i; + + line = strtok_r(file, "\n", &next); + while (line) { + item = malloc_or_die(sizeof(*item)); + ret = sscanf(line, "%as : %as", + (float *)&addr_str, /* workaround gcc warning */ + (float *)&item->printk); + item->addr = strtoull(addr_str, NULL, 16); + free(addr_str); + + item->next = list; + list = item; + line = strtok_r(NULL, "\n", &next); + printk_count++; + } + + printk_list = malloc_or_die(sizeof(*printk_list) * printk_count + 1); + + i = 0; + while (list) { + printk_list[i].printk = list->printk; + printk_list[i].addr = list->addr; + i++; + item = list; + list = list->next; + free(item); + } + + qsort(printk_list, printk_count, sizeof(*printk_list), printk_cmp); +} + +void print_printk(void) +{ + int i; + + for (i = 0; i < (int)printk_count; i++) { + printf("%016llx %s\n", + printk_list[i].addr, + printk_list[i].printk); + } +} + +static struct event *alloc_event(void) +{ + struct event *event; + + event = malloc_or_die(sizeof(*event)); + memset(event, 0, sizeof(*event)); + + return event; +} + +enum event_type { + EVENT_ERROR, + EVENT_NONE, + EVENT_SPACE, + EVENT_NEWLINE, + EVENT_OP, + EVENT_DELIM, + EVENT_ITEM, + EVENT_DQUOTE, + EVENT_SQUOTE, +}; + +static struct event *event_list; + +static void add_event(struct event *event) +{ + event->next = event_list; + event_list = event; +} + +static int event_item_type(enum event_type type) +{ + switch (type) { + case EVENT_ITEM ... EVENT_SQUOTE: + return 1; + case EVENT_ERROR ... EVENT_DELIM: + default: + return 0; + } +} + +static void free_arg(struct print_arg *arg) +{ + if (!arg) + return; + + switch (arg->type) { + case PRINT_ATOM: + if (arg->atom.atom) + free(arg->atom.atom); + break; + case PRINT_NULL: + case PRINT_FIELD ... PRINT_OP: + default: + /* todo */ + break; + } + + free(arg); +} + +static enum event_type get_type(int ch) +{ + if (ch == '\n') + return EVENT_NEWLINE; + if (isspace(ch)) + return EVENT_SPACE; + if (isalnum(ch) || ch == '_') + return EVENT_ITEM; + if (ch == '\'') + return EVENT_SQUOTE; + if (ch == '"') + return EVENT_DQUOTE; + if (!isprint(ch)) + return EVENT_NONE; + if (ch == '(' || ch == ')' || ch == ',') + return EVENT_DELIM; + + return EVENT_OP; +} + +static int __read_char(void) +{ + if (input_buf_ptr >= input_buf_siz) + return -1; + + return input_buf[input_buf_ptr++]; +} + +static int __peek_char(void) +{ + if (input_buf_ptr >= input_buf_siz) + return -1; + + return input_buf[input_buf_ptr]; +} + +static enum event_type __read_token(char **tok) +{ + char buf[BUFSIZ]; + int ch, last_ch, quote_ch, next_ch; + int i = 0; + int tok_size = 0; + enum event_type type; + + *tok = NULL; + + + ch = __read_char(); + if (ch < 0) + return EVENT_NONE; + + type = get_type(ch); + if (type == EVENT_NONE) + return type; + + buf[i++] = ch; + + switch (type) { + case EVENT_NEWLINE: + case EVENT_DELIM: + *tok = malloc_or_die(2); + (*tok)[0] = ch; + (*tok)[1] = 0; + return type; + + case EVENT_OP: + switch (ch) { + case '-': + next_ch = __peek_char(); + if (next_ch == '>') { + buf[i++] = __read_char(); + break; + } + /* fall through */ + case '+': + case '|': + case '&': + case '>': + case '<': + last_ch = ch; + ch = __peek_char(); + if (ch != last_ch) + goto test_equal; + buf[i++] = __read_char(); + switch (last_ch) { + case '>': + case '<': + goto test_equal; + default: + break; + } + break; + case '!': + case '=': + goto test_equal; + default: /* what should we do instead? */ + break; + } + buf[i] = 0; + *tok = strdup(buf); + return type; + + test_equal: + ch = __peek_char(); + if (ch == '=') + buf[i++] = __read_char(); + break; + + case EVENT_DQUOTE: + case EVENT_SQUOTE: + /* don't keep quotes */ + i--; + quote_ch = ch; + last_ch = 0; + do { + if (i == (BUFSIZ - 1)) { + buf[i] = 0; + if (*tok) { + *tok = realloc(*tok, tok_size + BUFSIZ); + if (!*tok) + return EVENT_NONE; + strcat(*tok, buf); + } else + *tok = strdup(buf); + + if (!*tok) + return EVENT_NONE; + tok_size += BUFSIZ; + i = 0; + } + last_ch = ch; + ch = __read_char(); + buf[i++] = ch; + } while (ch != quote_ch && last_ch != '\\'); + /* remove the last quote */ + i--; + goto out; + + case EVENT_ERROR ... EVENT_SPACE: + case EVENT_ITEM: + default: + break; + } + + while (get_type(__peek_char()) == type) { + if (i == (BUFSIZ - 1)) { + buf[i] = 0; + if (*tok) { + *tok = realloc(*tok, tok_size + BUFSIZ); + if (!*tok) + return EVENT_NONE; + strcat(*tok, buf); + } else + *tok = strdup(buf); + + if (!*tok) + return EVENT_NONE; + tok_size += BUFSIZ; + i = 0; + } + ch = __read_char(); + buf[i++] = ch; + } + + out: + buf[i] = 0; + if (*tok) { + *tok = realloc(*tok, tok_size + i); + if (!*tok) + return EVENT_NONE; + strcat(*tok, buf); + } else + *tok = strdup(buf); + if (!*tok) + return EVENT_NONE; + + return type; +} + +static void free_token(char *tok) +{ + if (tok) + free(tok); +} + +static enum event_type read_token(char **tok) +{ + enum event_type type; + + for (;;) { + type = __read_token(tok); + if (type != EVENT_SPACE) + return type; + + free_token(*tok); + } + + /* not reached */ + return EVENT_NONE; +} + +/* no newline */ +static enum event_type read_token_item(char **tok) +{ + enum event_type type; + + for (;;) { + type = __read_token(tok); + if (type != EVENT_SPACE && type != EVENT_NEWLINE) + return type; + + free_token(*tok); + } + + /* not reached */ + return EVENT_NONE; +} + +static int test_type(enum event_type type, enum event_type expect) +{ + if (type != expect) { + die("Error: expected type %d but read %d", + expect, type); + return -1; + } + return 0; +} + +static int test_type_token(enum event_type type, char *token, + enum event_type expect, char *expect_tok) +{ + if (type != expect) { + die("Error: expected type %d but read %d", + expect, type); + return -1; + } + + if (strcmp(token, expect_tok) != 0) { + die("Error: expected '%s' but read '%s'", + expect_tok, token); + return -1; + } + return 0; +} + +static int __read_expect_type(enum event_type expect, char **tok, int newline_ok) +{ + enum event_type type; + + if (newline_ok) + type = read_token(tok); + else + type = read_token_item(tok); + return test_type(type, expect); +} + +static int read_expect_type(enum event_type expect, char **tok) +{ + return __read_expect_type(expect, tok, 1); +} + +static int __read_expected(enum event_type expect, char *str, int newline_ok) +{ + enum event_type type; + char *token; + int ret; + + if (newline_ok) + type = read_token(&token); + else + type = read_token_item(&token); + + ret = test_type_token(type, token, expect, str); + + free_token(token); + + return 0; +} + +static int read_expected(enum event_type expect, char *str) +{ + return __read_expected(expect, str, 1); +} + +static int read_expected_item(enum event_type expect, char *str) +{ + return __read_expected(expect, str, 0); +} + +static char *event_read_name(void) +{ + char *token; + + if (read_expected(EVENT_ITEM, (char *)"name") < 0) + return NULL; + + if (read_expected(EVENT_OP, (char *)":") < 0) + return NULL; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + return token; + + fail: + free_token(token); + return NULL; +} + +static int event_read_id(void) +{ + char *token; + int id; + + if (read_expected_item(EVENT_ITEM, (char *)"ID") < 0) + return -1; + + if (read_expected(EVENT_OP, (char *)":") < 0) + return -1; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + id = strtoul(token, NULL, 0); + free_token(token); + return id; + + fail: + free_token(token); + return -1; +} + +static int event_read_fields(struct event *event, struct format_field **fields) +{ + struct format_field *field = NULL; + enum event_type type; + char *token; + char *last_token; + int count = 0; + + do { + type = read_token(&token); + if (type == EVENT_NEWLINE) { + free_token(token); + return count; + } + + count++; + + if (test_type_token(type, token, EVENT_ITEM, (char *)"field")) + goto fail; + free_token(token); + + type = read_token(&token); + /* + * The ftrace fields may still use the "special" name. + * Just ignore it. + */ + if (event->flags & EVENT_FL_ISFTRACE && + type == EVENT_ITEM && strcmp(token, "special") == 0) { + free_token(token); + type = read_token(&token); + } + + if (test_type_token(type, token, EVENT_OP, (char *)":") < 0) + return -1; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + last_token = token; + + field = malloc_or_die(sizeof(*field)); + memset(field, 0, sizeof(*field)); + + /* read the rest of the type */ + for (;;) { + type = read_token(&token); + if (type == EVENT_ITEM || + (type == EVENT_OP && strcmp(token, "*") == 0) || + /* + * Some of the ftrace fields are broken and have + * an illegal "." in them. + */ + (event->flags & EVENT_FL_ISFTRACE && + type == EVENT_OP && strcmp(token, ".") == 0)) { + + if (strcmp(token, "*") == 0) + field->flags |= FIELD_IS_POINTER; + + if (field->type) { + field->type = realloc(field->type, + strlen(field->type) + + strlen(last_token) + 2); + strcat(field->type, " "); + strcat(field->type, last_token); + } else + field->type = last_token; + last_token = token; + continue; + } + + break; + } + + if (!field->type) { + die("no type found"); + goto fail; + } + field->name = last_token; + + if (test_type(type, EVENT_OP)) + goto fail; + + if (strcmp(token, "[") == 0) { + enum event_type last_type = type; + char *brackets = token; + int len; + + field->flags |= FIELD_IS_ARRAY; + + type = read_token(&token); + while (strcmp(token, "]") != 0) { + if (last_type == EVENT_ITEM && + type == EVENT_ITEM) + len = 2; + else + len = 1; + last_type = type; + + brackets = realloc(brackets, + strlen(brackets) + + strlen(token) + len); + if (len == 2) + strcat(brackets, " "); + strcat(brackets, token); + free_token(token); + type = read_token(&token); + if (type == EVENT_NONE) { + die("failed to find token"); + goto fail; + } + } + + free_token(token); + + brackets = realloc(brackets, strlen(brackets) + 2); + strcat(brackets, "]"); + + /* add brackets to type */ + + type = read_token(&token); + /* + * If the next token is not an OP, then it is of + * the format: type [] item; + */ + if (type == EVENT_ITEM) { + field->type = realloc(field->type, + strlen(field->type) + + strlen(field->name) + + strlen(brackets) + 2); + strcat(field->type, " "); + strcat(field->type, field->name); + free_token(field->name); + strcat(field->type, brackets); + field->name = token; + type = read_token(&token); + } else { + field->type = realloc(field->type, + strlen(field->type) + + strlen(brackets) + 1); + strcat(field->type, brackets); + } + free(brackets); + } + + if (test_type_token(type, token, EVENT_OP, (char *)";")) + goto fail; + free_token(token); + + if (read_expected(EVENT_ITEM, (char *)"offset") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, (char *)":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + field->offset = strtoul(token, NULL, 0); + free_token(token); + + if (read_expected(EVENT_OP, (char *)";") < 0) + goto fail_expect; + + if (read_expected(EVENT_ITEM, (char *)"size") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, (char *)":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + field->size = strtoul(token, NULL, 0); + free_token(token); + + if (read_expected(EVENT_OP, (char *)";") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_NEWLINE, &token) < 0) + goto fail; + free_token(token); + + *fields = field; + fields = &field->next; + + } while (1); + + return 0; + +fail: + free_token(token); +fail_expect: + if (field) + free(field); + return -1; +} + +static int event_read_format(struct event *event) +{ + char *token; + int ret; + + if (read_expected_item(EVENT_ITEM, (char *)"format") < 0) + return -1; + + if (read_expected(EVENT_OP, (char *)":") < 0) + return -1; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + free_token(token); + + ret = event_read_fields(event, &event->format.common_fields); + if (ret < 0) + return ret; + event->format.nr_common = ret; + + ret = event_read_fields(event, &event->format.fields); + if (ret < 0) + return ret; + event->format.nr_fields = ret; + + return 0; + + fail: + free_token(token); + return -1; +} + +enum event_type +process_arg_token(struct event *event, struct print_arg *arg, + char **tok, enum event_type type); + +static enum event_type +process_arg(struct event *event, struct print_arg *arg, char **tok) +{ + enum event_type type; + char *token; + + type = read_token(&token); + *tok = token; + + return process_arg_token(event, arg, tok, type); +} + +static enum event_type +process_cond(struct event *event, struct print_arg *top, char **tok) +{ + struct print_arg *arg, *left, *right; + enum event_type type; + char *token = NULL; + + arg = malloc_or_die(sizeof(*arg)); + memset(arg, 0, sizeof(*arg)); + + left = malloc_or_die(sizeof(*left)); + + right = malloc_or_die(sizeof(*right)); + + arg->type = PRINT_OP; + arg->op.left = left; + arg->op.right = right; + + *tok = NULL; + type = process_arg(event, left, &token); + if (test_type_token(type, token, EVENT_OP, (char *)":")) + goto out_free; + + arg->op.op = token; + + type = process_arg(event, right, &token); + + top->op.right = arg; + + *tok = token; + return type; + +out_free: + free_token(*tok); + free(right); + free(left); + free_arg(arg); + return EVENT_ERROR; +} + +static int get_op_prio(char *op) +{ + if (!op[1]) { + switch (op[0]) { + case '*': + case '/': + case '%': + return 6; + case '+': + case '-': + return 7; + /* '>>' and '<<' are 8 */ + case '<': + case '>': + return 9; + /* '==' and '!=' are 10 */ + case '&': + return 11; + case '^': + return 12; + case '|': + return 13; + case '?': + return 16; + default: + die("unknown op '%c'", op[0]); + return -1; + } + } else { + if (strcmp(op, "++") == 0 || + strcmp(op, "--") == 0) { + return 3; + } else if (strcmp(op, ">>") == 0 || + strcmp(op, "<<") == 0) { + return 8; + } else if (strcmp(op, ">=") == 0 || + strcmp(op, "<=") == 0) { + return 9; + } else if (strcmp(op, "==") == 0 || + strcmp(op, "!=") == 0) { + return 10; + } else if (strcmp(op, "&&") == 0) { + return 14; + } else if (strcmp(op, "||") == 0) { + return 15; + } else { + die("unknown op '%s'", op); + return -1; + } + } +} + +static void set_op_prio(struct print_arg *arg) +{ + + /* single ops are the greatest */ + if (!arg->op.left || arg->op.left->type == PRINT_NULL) { + arg->op.prio = 0; + return; + } + + arg->op.prio = get_op_prio(arg->op.op); +} + +static enum event_type +process_op(struct event *event, struct print_arg *arg, char **tok) +{ + struct print_arg *left, *right = NULL; + enum event_type type; + char *token; + + /* the op is passed in via tok */ + token = *tok; + + if (arg->type == PRINT_OP && !arg->op.left) { + /* handle single op */ + if (token[1]) { + die("bad op token %s", token); + return EVENT_ERROR; + } + switch (token[0]) { + case '!': + case '+': + case '-': + break; + default: + die("bad op token %s", token); + return EVENT_ERROR; + } + + /* make an empty left */ + left = malloc_or_die(sizeof(*left)); + left->type = PRINT_NULL; + arg->op.left = left; + + right = malloc_or_die(sizeof(*right)); + arg->op.right = right; + + type = process_arg(event, right, tok); + + } else if (strcmp(token, "?") == 0) { + + left = malloc_or_die(sizeof(*left)); + /* copy the top arg to the left */ + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + arg->op.prio = 0; + + type = process_cond(event, arg, tok); + + } else if (strcmp(token, ">>") == 0 || + strcmp(token, "<<") == 0 || + strcmp(token, "&") == 0 || + strcmp(token, "|") == 0 || + strcmp(token, "&&") == 0 || + strcmp(token, "||") == 0 || + strcmp(token, "-") == 0 || + strcmp(token, "+") == 0 || + strcmp(token, "*") == 0 || + strcmp(token, "^") == 0 || + strcmp(token, "/") == 0 || + strcmp(token, "==") == 0 || + strcmp(token, "!=") == 0) { + + left = malloc_or_die(sizeof(*left)); + + /* copy the top arg to the left */ + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + + set_op_prio(arg); + + right = malloc_or_die(sizeof(*right)); + + type = process_arg(event, right, tok); + + arg->op.right = right; + + } else { + die("unknown op '%s'", token); + /* the arg is now the left side */ + return EVENT_NONE; + } + + + if (type == EVENT_OP) { + int prio; + + /* higher prios need to be closer to the root */ + prio = get_op_prio(*tok); + + if (prio > arg->op.prio) + return process_op(event, arg, tok); + + return process_op(event, right, tok); + } + + return type; +} + +static enum event_type +process_entry(struct event *event __unused, struct print_arg *arg, + char **tok) +{ + enum event_type type; + char *field; + char *token; + + if (read_expected(EVENT_OP, (char *)"->") < 0) + return EVENT_ERROR; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + field = token; + + arg->type = PRINT_FIELD; + arg->field.name = field; + + type = read_token(&token); + *tok = token; + + return type; + +fail: + free_token(token); + return EVENT_ERROR; +} + +static char *arg_eval (struct print_arg *arg); + +static long long arg_num_eval(struct print_arg *arg) +{ + long long left, right; + long long val = 0; + + switch (arg->type) { + case PRINT_ATOM: + val = strtoll(arg->atom.atom, NULL, 0); + break; + case PRINT_TYPE: + val = arg_num_eval(arg->typecast.item); + break; + case PRINT_OP: + switch (arg->op.op[0]) { + case '|': + left = arg_num_eval(arg->op.left); + right = arg_num_eval(arg->op.right); + if (arg->op.op[1]) + val = left || right; + else + val = left | right; + break; + case '&': + left = arg_num_eval(arg->op.left); + right = arg_num_eval(arg->op.right); + if (arg->op.op[1]) + val = left && right; + else + val = left & right; + break; + case '<': + left = arg_num_eval(arg->op.left); + right = arg_num_eval(arg->op.right); + switch (arg->op.op[1]) { + case 0: + val = left < right; + break; + case '<': + val = left << right; + break; + case '=': + val = left <= right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + case '>': + left = arg_num_eval(arg->op.left); + right = arg_num_eval(arg->op.right); + switch (arg->op.op[1]) { + case 0: + val = left > right; + break; + case '>': + val = left >> right; + break; + case '=': + val = left >= right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + case '=': + left = arg_num_eval(arg->op.left); + right = arg_num_eval(arg->op.right); + + if (arg->op.op[1] != '=') + die("unknown op '%s'", arg->op.op); + + val = left == right; + break; + case '!': + left = arg_num_eval(arg->op.left); + right = arg_num_eval(arg->op.right); + + switch (arg->op.op[1]) { + case '=': + val = left != right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + + case PRINT_NULL: + case PRINT_FIELD ... PRINT_SYMBOL: + case PRINT_STRING: + default: + die("invalid eval type %d", arg->type); + + } + return val; +} + +static char *arg_eval (struct print_arg *arg) +{ + long long val; + static char buf[20]; + + switch (arg->type) { + case PRINT_ATOM: + return arg->atom.atom; + case PRINT_TYPE: + return arg_eval(arg->typecast.item); + case PRINT_OP: + val = arg_num_eval(arg); + sprintf(buf, "%lld", val); + return buf; + + case PRINT_NULL: + case PRINT_FIELD ... PRINT_SYMBOL: + case PRINT_STRING: + default: + die("invalid eval type %d", arg->type); + break; + } + + return NULL; +} + +static enum event_type +process_fields(struct event *event, struct print_flag_sym **list, char **tok) +{ + enum event_type type; + struct print_arg *arg = NULL; + struct print_flag_sym *field; + char *token = NULL; + char *value; + + do { + free_token(token); + type = read_token_item(&token); + if (test_type_token(type, token, EVENT_OP, (char *)"{")) + break; + + arg = malloc_or_die(sizeof(*arg)); + + free_token(token); + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + goto out_free; + + field = malloc_or_die(sizeof(*field)); + memset(field, 0, sizeof(field)); + + value = arg_eval(arg); + field->value = strdup(value); + + free_token(token); + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_OP, (char *)"}")) + goto out_free; + + value = arg_eval(arg); + field->str = strdup(value); + free_arg(arg); + arg = NULL; + + *list = field; + list = &field->next; + + free_token(token); + type = read_token_item(&token); + } while (type == EVENT_DELIM && strcmp(token, ",") == 0); + + *tok = token; + return type; + +out_free: + free_arg(arg); + free_token(token); + + return EVENT_ERROR; +} + +static enum event_type +process_flags(struct event *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_FLAGS; + + if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) + return EVENT_ERROR; + + field = malloc_or_die(sizeof(*field)); + + type = process_arg(event, field, &token); + if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + goto out_free; + + arg->flags.field = field; + + type = read_token_item(&token); + if (event_item_type(type)) { + arg->flags.delim = token; + type = read_token_item(&token); + } + + if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + goto out_free; + + type = process_fields(event, &arg->flags.flags, &token); + if (test_type_token(type, token, EVENT_DELIM, (char *)")")) + goto out_free; + + free_token(token); + type = read_token_item(tok); + return type; + +out_free: + free_token(token); + return EVENT_ERROR; +} + +static enum event_type +process_symbols(struct event *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_SYMBOL; + + if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) + return EVENT_ERROR; + + field = malloc_or_die(sizeof(*field)); + + type = process_arg(event, field, &token); + if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + goto out_free; + + arg->symbol.field = field; + + type = process_fields(event, &arg->symbol.symbols, &token); + if (test_type_token(type, token, EVENT_DELIM, (char *)")")) + goto out_free; + + free_token(token); + type = read_token_item(tok); + return type; + +out_free: + free_token(token); + return EVENT_ERROR; +} + +static enum event_type +process_paren(struct event *event, struct print_arg *arg, char **tok) +{ + struct print_arg *item_arg; + enum event_type type; + char *token; + + type = process_arg(event, arg, &token); + + if (type == EVENT_ERROR) + return EVENT_ERROR; + + if (type == EVENT_OP) + type = process_op(event, arg, &token); + + if (type == EVENT_ERROR) + return EVENT_ERROR; + + if (test_type_token(type, token, EVENT_DELIM, (char *)")")) { + free_token(token); + return EVENT_ERROR; + } + + free_token(token); + type = read_token_item(&token); + + /* + * If the next token is an item or another open paren, then + * this was a typecast. + */ + if (event_item_type(type) || + (type == EVENT_DELIM && strcmp(token, "(") == 0)) { + + /* make this a typecast and contine */ + + /* prevous must be an atom */ + if (arg->type != PRINT_ATOM) + die("previous needed to be PRINT_ATOM"); + + item_arg = malloc_or_die(sizeof(*item_arg)); + + arg->type = PRINT_TYPE; + arg->typecast.type = arg->atom.atom; + arg->typecast.item = item_arg; + type = process_arg_token(event, item_arg, &token, type); + + } + + *tok = token; + return type; +} + + +static enum event_type +process_str(struct event *event __unused, struct print_arg *arg, char **tok) +{ + enum event_type type; + char *token; + + if (read_expected(EVENT_DELIM, (char *)"(") < 0) + return EVENT_ERROR; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + arg->type = PRINT_STRING; + arg->string.string = token; + + if (read_expected(EVENT_DELIM, (char *)")") < 0) + return EVENT_ERROR; + + type = read_token(&token); + *tok = token; + + return type; +fail: + free_token(token); + return EVENT_ERROR; +} + +enum event_type +process_arg_token(struct event *event, struct print_arg *arg, + char **tok, enum event_type type) +{ + char *token; + char *atom; + + token = *tok; + + switch (type) { + case EVENT_ITEM: + if (strcmp(token, "REC") == 0) { + free_token(token); + type = process_entry(event, arg, &token); + } else if (strcmp(token, "__print_flags") == 0) { + free_token(token); + type = process_flags(event, arg, &token); + } else if (strcmp(token, "__print_symbolic") == 0) { + free_token(token); + type = process_symbols(event, arg, &token); + } else if (strcmp(token, "__get_str") == 0) { + free_token(token); + type = process_str(event, arg, &token); + } else { + atom = token; + /* test the next token */ + type = read_token_item(&token); + + /* atoms can be more than one token long */ + while (type == EVENT_ITEM) { + atom = realloc(atom, strlen(atom) + strlen(token) + 2); + strcat(atom, " "); + strcat(atom, token); + free_token(token); + type = read_token_item(&token); + } + + /* todo, test for function */ + + arg->type = PRINT_ATOM; + arg->atom.atom = atom; + } + break; + case EVENT_DQUOTE: + case EVENT_SQUOTE: + arg->type = PRINT_ATOM; + arg->atom.atom = token; + type = read_token_item(&token); + break; + case EVENT_DELIM: + if (strcmp(token, "(") == 0) { + free_token(token); + type = process_paren(event, arg, &token); + break; + } + case EVENT_OP: + /* handle single ops */ + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = NULL; + type = process_op(event, arg, &token); + + break; + + case EVENT_ERROR ... EVENT_NEWLINE: + default: + die("unexpected type %d", type); + } + *tok = token; + + return type; +} + +static int event_read_print_args(struct event *event, struct print_arg **list) +{ + enum event_type type; + struct print_arg *arg; + char *token; + int args = 0; + + do { + arg = malloc_or_die(sizeof(*arg)); + memset(arg, 0, sizeof(*arg)); + + type = process_arg(event, arg, &token); + + if (type == EVENT_ERROR) { + free_arg(arg); + return -1; + } + + *list = arg; + args++; + + if (type == EVENT_OP) { + type = process_op(event, arg, &token); + list = &arg->next; + continue; + } + + if (type == EVENT_DELIM && strcmp(token, ",") == 0) { + free_token(token); + *list = arg; + list = &arg->next; + continue; + } + break; + } while (type != EVENT_NONE); + + if (type != EVENT_NONE) + free_token(token); + + return args; +} + +static int event_read_print(struct event *event) +{ + enum event_type type; + char *token; + int ret; + + if (read_expected_item(EVENT_ITEM, (char *)"print") < 0) + return -1; + + if (read_expected(EVENT_ITEM, (char *)"fmt") < 0) + return -1; + + if (read_expected(EVENT_OP, (char *)":") < 0) + return -1; + + if (read_expect_type(EVENT_DQUOTE, &token) < 0) + goto fail; + + event->print_fmt.format = token; + event->print_fmt.args = NULL; + + /* ok to have no arg */ + type = read_token_item(&token); + + if (type == EVENT_NONE) + return 0; + + if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + goto fail; + + free_token(token); + + ret = event_read_print_args(event, &event->print_fmt.args); + if (ret < 0) + return -1; + + return 0; + + fail: + free_token(token); + return -1; +} + +static struct format_field * +find_common_field(struct event *event, const char *name) +{ + struct format_field *format; + + for (format = event->format.common_fields; + format; format = format->next) { + if (strcmp(format->name, name) == 0) + break; + } + + return format; +} + +static struct format_field * +find_field(struct event *event, const char *name) +{ + struct format_field *format; + + for (format = event->format.fields; + format; format = format->next) { + if (strcmp(format->name, name) == 0) + break; + } + + return format; +} + +static struct format_field * +find_any_field(struct event *event, const char *name) +{ + struct format_field *format; + + format = find_common_field(event, name); + if (format) + return format; + return find_field(event, name); +} + +static unsigned long long read_size(void *ptr, int size) +{ + switch (size) { + case 1: + return *(unsigned char *)ptr; + case 2: + return data2host2(ptr); + case 4: + return data2host4(ptr); + case 8: + return data2host8(ptr); + default: + /* BUG! */ + return 0; + } +} + +static int get_common_info(const char *type, int *offset, int *size) +{ + struct event *event; + struct format_field *field; + + /* + * All events should have the same common elements. + * Pick any event to find where the type is; + */ + if (!event_list) + die("no event_list!"); + + event = event_list; + field = find_common_field(event, type); + if (!field) + die("field '%s' not found", type); + + *offset = field->offset; + *size = field->size; + + return 0; +} + +static int parse_common_type(void *data) +{ + static int type_offset; + static int type_size; + int ret; + + if (!type_size) { + ret = get_common_info("common_type", + &type_offset, + &type_size); + if (ret < 0) + return ret; + } + return read_size(data + type_offset, type_size); +} + +static int parse_common_pid(void *data) +{ + static int pid_offset; + static int pid_size; + int ret; + + if (!pid_size) { + ret = get_common_info("common_pid", + &pid_offset, + &pid_size); + if (ret < 0) + return ret; + } + + return read_size(data + pid_offset, pid_size); +} + +static struct event *find_event(int id) +{ + struct event *event; + + for (event = event_list; event; event = event->next) { + if (event->id == id) + break; + } + return event; +} + +static unsigned long long eval_num_arg(void *data, int size, + struct event *event, struct print_arg *arg) +{ + unsigned long long val = 0; + unsigned long long left, right; + + switch (arg->type) { + case PRINT_NULL: + /* ?? */ + return 0; + case PRINT_ATOM: + return strtoull(arg->atom.atom, NULL, 0); + case PRINT_FIELD: + if (!arg->field.field) { + arg->field.field = find_any_field(event, arg->field.name); + if (!arg->field.field) + die("field %s not found", arg->field.name); + } + /* must be a number */ + val = read_size(data + arg->field.field->offset, + arg->field.field->size); + break; + case PRINT_FLAGS: + case PRINT_SYMBOL: + break; + case PRINT_TYPE: + return eval_num_arg(data, size, event, arg->typecast.item); + case PRINT_STRING: + return 0; + break; + case PRINT_OP: + left = eval_num_arg(data, size, event, arg->op.left); + right = eval_num_arg(data, size, event, arg->op.right); + switch (arg->op.op[0]) { + case '|': + if (arg->op.op[1]) + val = left || right; + else + val = left | right; + break; + case '&': + if (arg->op.op[1]) + val = left && right; + else + val = left & right; + break; + case '<': + switch (arg->op.op[1]) { + case 0: + val = left < right; + break; + case '<': + val = left << right; + break; + case '=': + val = left <= right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + case '>': + switch (arg->op.op[1]) { + case 0: + val = left > right; + break; + case '>': + val = left >> right; + break; + case '=': + val = left >= right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + case '=': + if (arg->op.op[1] != '=') + die("unknown op '%s'", arg->op.op); + val = left == right; + break; + default: + die("unknown op '%s'", arg->op.op); + } + break; + default: /* not sure what to do there */ + return 0; + } + return val; +} + +struct flag { + const char *name; + unsigned long long value; +}; + +static const struct flag flags[] = { + { "HI_SOFTIRQ", 0 }, + { "TIMER_SOFTIRQ", 1 }, + { "NET_TX_SOFTIRQ", 2 }, + { "NET_RX_SOFTIRQ", 3 }, + { "BLOCK_SOFTIRQ", 4 }, + { "TASKLET_SOFTIRQ", 5 }, + { "SCHED_SOFTIRQ", 6 }, + { "HRTIMER_SOFTIRQ", 7 }, + { "RCU_SOFTIRQ", 8 }, + + { "HRTIMER_NORESTART", 0 }, + { "HRTIMER_RESTART", 1 }, +}; + +static unsigned long long eval_flag(const char *flag) +{ + int i; + + /* + * Some flags in the format files do not get converted. + * If the flag is not numeric, see if it is something that + * we already know about. + */ + if (isdigit(flag[0])) + return strtoull(flag, NULL, 0); + + for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++) + if (strcmp(flags[i].name, flag) == 0) + return flags[i].value; + + return 0; +} + +static void print_str_arg(void *data, int size, + struct event *event, struct print_arg *arg) +{ + struct print_flag_sym *flag; + unsigned long long val, fval; + char *str; + int print; + + switch (arg->type) { + case PRINT_NULL: + /* ?? */ + return; + case PRINT_ATOM: + printf("%s", arg->atom.atom); + return; + case PRINT_FIELD: + if (!arg->field.field) { + arg->field.field = find_any_field(event, arg->field.name); + if (!arg->field.field) + die("field %s not found", arg->field.name); + } + str = malloc_or_die(arg->field.field->size + 1); + memcpy(str, data + arg->field.field->offset, + arg->field.field->size); + str[arg->field.field->size] = 0; + free(str); + break; + case PRINT_FLAGS: + val = eval_num_arg(data, size, event, arg->flags.field); + print = 0; + for (flag = arg->flags.flags; flag; flag = flag->next) { + fval = eval_flag(flag->value); + if (!val && !fval) { + printf("%s", flag->str); + break; + } + if (fval && (val & fval) == fval) { + if (print && arg->flags.delim) + printf("%s", arg->flags.delim); + printf("%s", flag->str); + print = 1; + val &= ~fval; + } + } + break; + case PRINT_SYMBOL: + val = eval_num_arg(data, size, event, arg->symbol.field); + for (flag = arg->symbol.symbols; flag; flag = flag->next) { + fval = eval_flag(flag->value); + if (val == fval) { + printf("%s", flag->str); + break; + } + } + break; + + case PRINT_TYPE: + break; + case PRINT_STRING: + printf("%s", arg->string.string); + break; + case PRINT_OP: + /* + * The only op for string should be ? : + */ + if (arg->op.op[0] != '?') + return; + val = eval_num_arg(data, size, event, arg->op.left); + if (val) + print_str_arg(data, size, event, arg->op.right->op.left); + else + print_str_arg(data, size, event, arg->op.right->op.right); + break; + default: + /* well... */ + break; + } +} + +static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event *event) +{ + static struct format_field *field, *ip_field; + struct print_arg *args, *arg, **next; + unsigned long long ip, val; + char *ptr; + void *bptr; + + if (!field) { + field = find_field(event, "buf"); + if (!field) + die("can't find buffer field for binary printk"); + ip_field = find_field(event, "ip"); + if (!ip_field) + die("can't find ip field for binary printk"); + } + + ip = read_size(data + ip_field->offset, ip_field->size); + + /* + * The first arg is the IP pointer. + */ + args = malloc_or_die(sizeof(*args)); + arg = args; + arg->next = NULL; + next = &arg->next; + + arg->type = PRINT_ATOM; + arg->atom.atom = malloc_or_die(32); + sprintf(arg->atom.atom, "%lld", ip); + + /* skip the first "%pf : " */ + for (ptr = fmt + 6, bptr = data + field->offset; + bptr < data + size && *ptr; ptr++) { + int ls = 0; + + if (*ptr == '%') { + process_again: + ptr++; + switch (*ptr) { + case '%': + break; + case 'l': + ls++; + goto process_again; + case 'L': + ls = 2; + goto process_again; + case '0' ... '9': + goto process_again; + case 'p': + ls = 1; + /* fall through */ + case 'd': + case 'u': + case 'x': + case 'i': + bptr = (void *)(((unsigned long)bptr + (long_size - 1)) & + ~(long_size - 1)); + switch (ls) { + case 0: + case 1: + ls = long_size; + break; + case 2: + ls = 8; + default: + break; + } + val = read_size(bptr, ls); + bptr += ls; + arg = malloc_or_die(sizeof(*arg)); + arg->next = NULL; + arg->type = PRINT_ATOM; + arg->atom.atom = malloc_or_die(32); + sprintf(arg->atom.atom, "%lld", val); + *next = arg; + next = &arg->next; + break; + case 's': + arg = malloc_or_die(sizeof(*arg)); + arg->next = NULL; + arg->type = PRINT_STRING; + arg->string.string = strdup(bptr); + bptr += strlen(bptr) + 1; + *next = arg; + next = &arg->next; + default: + break; + } + } + } + + return args; +} + +static void free_args(struct print_arg *args) +{ + struct print_arg *next; + + while (args) { + next = args->next; + + if (args->type == PRINT_ATOM) + free(args->atom.atom); + else + free(args->string.string); + free(args); + args = next; + } +} + +static char *get_bprint_format(void *data, int size __unused, struct event *event) +{ + unsigned long long addr; + static struct format_field *field; + struct printk_map *printk; + char *format; + char *p; + + if (!field) { + field = find_field(event, "fmt"); + if (!field) + die("can't find format field for binary printk"); + printf("field->offset = %d size=%d\n", field->offset, field->size); + } + + addr = read_size(data + field->offset, field->size); + + printk = find_printk(addr); + if (!printk) { + format = malloc_or_die(45); + sprintf(format, "%%pf : (NO FORMAT FOUND at %llx)\n", + addr); + return format; + } + + p = printk->printk; + /* Remove any quotes. */ + if (*p == '"') + p++; + format = malloc_or_die(strlen(p) + 10); + sprintf(format, "%s : %s", "%pf", p); + /* remove ending quotes and new line since we will add one too */ + p = format + strlen(format) - 1; + if (*p == '"') + *p = 0; + + p -= 2; + if (strcmp(p, "\\n") == 0) + *p = 0; + + return format; +} + +static void pretty_print(void *data, int size, struct event *event) +{ + struct print_fmt *print_fmt = &event->print_fmt; + struct print_arg *arg = print_fmt->args; + struct print_arg *args = NULL; + const char *ptr = print_fmt->format; + unsigned long long val; + struct func_map *func; + const char *saveptr; + char *bprint_fmt = NULL; + char format[32]; + int show_func; + int len; + int ls; + + if (event->flags & EVENT_FL_ISFUNC) + ptr = " %pF <-- %pF"; + + if (event->flags & EVENT_FL_ISBPRINT) { + bprint_fmt = get_bprint_format(data, size, event); + args = make_bprint_args(bprint_fmt, data, size, event); + arg = args; + ptr = bprint_fmt; + } + + for (; *ptr; ptr++) { + ls = 0; + if (*ptr == '%') { + saveptr = ptr; + show_func = 0; + cont_process: + ptr++; + switch (*ptr) { + case '%': + printf("%%"); + break; + case 'l': + ls++; + goto cont_process; + case 'L': + ls = 2; + goto cont_process; + case 'z': + case 'Z': + case '0' ... '9': + goto cont_process; + case 'p': + if (long_size == 4) + ls = 1; + else + ls = 2; + + if (*(ptr+1) == 'F' || + *(ptr+1) == 'f') { + ptr++; + show_func = *ptr; + } + + /* fall through */ + case 'd': + case 'i': + case 'x': + case 'X': + case 'u': + if (!arg) + die("no argument match"); + + len = ((unsigned long)ptr + 1) - + (unsigned long)saveptr; + + /* should never happen */ + if (len > 32) + die("bad format!"); + + memcpy(format, saveptr, len); + format[len] = 0; + + val = eval_num_arg(data, size, event, arg); + arg = arg->next; + + if (show_func) { + func = find_func(val); + if (func) { + printf("%s", func->func); + if (show_func == 'F') + printf("+0x%llx", + val - func->addr); + break; + } + } + switch (ls) { + case 0: + printf(format, (int)val); + break; + case 1: + printf(format, (long)val); + break; + case 2: + printf(format, (long long)val); + break; + default: + die("bad count (%d)", ls); + } + break; + case 's': + if (!arg) + die("no matching argument"); + + print_str_arg(data, size, event, arg); + arg = arg->next; + break; + default: + printf(">%c<", *ptr); + + } + } else + printf("%c", *ptr); + } + + if (args) { + free_args(args); + free(bprint_fmt); + } +} + +static inline int log10_cpu(int nb) +{ + if (nb / 100) + return 3; + if (nb / 10) + return 2; + return 1; +} + +/* taken from Linux, written by Frederic Weisbecker */ +static void print_graph_cpu(int cpu) +{ + int i; + int log10_this = log10_cpu(cpu); + int log10_all = log10_cpu(cpus); + + + /* + * Start with a space character - to make it stand out + * to the right a bit when trace output is pasted into + * email: + */ + printf(" "); + + /* + * Tricky - we space the CPU field according to the max + * number of online CPUs. On a 2-cpu system it would take + * a maximum of 1 digit - on a 128 cpu system it would + * take up to 3 digits: + */ + for (i = 0; i < log10_all - log10_this; i++) + printf(" "); + + printf("%d) ", cpu); +} + +#define TRACE_GRAPH_PROCINFO_LENGTH 14 +#define TRACE_GRAPH_INDENT 2 + +static void print_graph_proc(int pid, const char *comm) +{ + /* sign + log10(MAX_INT) + '\0' */ + char pid_str[11]; + int spaces = 0; + int len; + int i; + + sprintf(pid_str, "%d", pid); + + /* 1 stands for the "-" character */ + len = strlen(comm) + strlen(pid_str) + 1; + + if (len < TRACE_GRAPH_PROCINFO_LENGTH) + spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; + + /* First spaces to align center */ + for (i = 0; i < spaces / 2; i++) + printf(" "); + + printf("%s-%s", comm, pid_str); + + /* Last spaces to align center */ + for (i = 0; i < spaces - (spaces / 2); i++) + printf(" "); +} + +static struct record * +get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func, + struct record *next) +{ + struct format_field *field; + struct event *event; + unsigned long val; + int type; + int pid; + + type = parse_common_type(next->data); + event = find_event(type); + if (!event) + return NULL; + + if (!(event->flags & EVENT_FL_ISFUNCRET)) + return NULL; + + pid = parse_common_pid(next->data); + field = find_field(event, "func"); + if (!field) + die("function return does not have field func"); + + val = read_size(next->data + field->offset, field->size); + + if (cur_pid != pid || cur_func != val) + return NULL; + + /* this is a leaf, now advance the iterator */ + return trace_read_data(cpu); +} + +/* Signal a overhead of time execution to the output */ +static void print_graph_overhead(unsigned long long duration) +{ + /* Non nested entry or return */ + if (duration == ~0ULL) + return (void)printf(" "); + + /* Duration exceeded 100 msecs */ + if (duration > 100000ULL) + return (void)printf("! "); + + /* Duration exceeded 10 msecs */ + if (duration > 10000ULL) + return (void)printf("+ "); + + printf(" "); +} + +static void print_graph_duration(unsigned long long duration) +{ + unsigned long usecs = duration / 1000; + unsigned long nsecs_rem = duration % 1000; + /* log10(ULONG_MAX) + '\0' */ + char msecs_str[21]; + char nsecs_str[5]; + int len; + int i; + + sprintf(msecs_str, "%lu", usecs); + + /* Print msecs */ + len = printf("%lu", usecs); + + /* Print nsecs (we don't want to exceed 7 numbers) */ + if (len < 7) { + snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); + len += printf(".%s", nsecs_str); + } + + printf(" us "); + + /* Print remaining spaces to fit the row's width */ + for (i = len; i < 7; i++) + printf(" "); + + printf("| "); +} + +static void +print_graph_entry_leaf(struct event *event, void *data, struct record *ret_rec) +{ + unsigned long long rettime, calltime; + unsigned long long duration, depth; + unsigned long long val; + struct format_field *field; + struct func_map *func; + struct event *ret_event; + int type; + int i; + + type = parse_common_type(ret_rec->data); + ret_event = find_event(type); + + field = find_field(ret_event, "rettime"); + if (!field) + die("can't find rettime in return graph"); + rettime = read_size(ret_rec->data + field->offset, field->size); + + field = find_field(ret_event, "calltime"); + if (!field) + die("can't find rettime in return graph"); + calltime = read_size(ret_rec->data + field->offset, field->size); + + duration = rettime - calltime; + + /* Overhead */ + print_graph_overhead(duration); + + /* Duration */ + print_graph_duration(duration); + + field = find_field(event, "depth"); + if (!field) + die("can't find depth in entry graph"); + depth = read_size(data + field->offset, field->size); + + /* Function */ + for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) + printf(" "); + + field = find_field(event, "func"); + if (!field) + die("can't find func in entry graph"); + val = read_size(data + field->offset, field->size); + func = find_func(val); + + if (func) + printf("%s();", func->func); + else + printf("%llx();", val); +} + +static void print_graph_nested(struct event *event, void *data) +{ + struct format_field *field; + unsigned long long depth; + unsigned long long val; + struct func_map *func; + int i; + + /* No overhead */ + print_graph_overhead(-1); + + /* No time */ + printf(" | "); + + field = find_field(event, "depth"); + if (!field) + die("can't find depth in entry graph"); + depth = read_size(data + field->offset, field->size); + + /* Function */ + for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) + printf(" "); + + field = find_field(event, "func"); + if (!field) + die("can't find func in entry graph"); + val = read_size(data + field->offset, field->size); + func = find_func(val); + + if (func) + printf("%s() {", func->func); + else + printf("%llx() {", val); +} + +static void +pretty_print_func_ent(void *data, int size, struct event *event, + int cpu, int pid, const char *comm, + unsigned long secs, unsigned long usecs) +{ + struct format_field *field; + struct record *rec; + void *copy_data; + unsigned long val; + + printf("%5lu.%06lu | ", secs, usecs); + + print_graph_cpu(cpu); + print_graph_proc(pid, comm); + + printf(" | "); + + field = find_field(event, "func"); + if (!field) + die("function entry does not have func field"); + + val = read_size(data + field->offset, field->size); + + /* + * peek_data may unmap the data pointer. Copy it first. + */ + copy_data = malloc_or_die(size); + memcpy(copy_data, data, size); + data = copy_data; + + rec = trace_peek_data(cpu); + if (rec) { + rec = get_return_for_leaf(cpu, pid, val, rec); + if (rec) { + print_graph_entry_leaf(event, data, rec); + goto out_free; + } + } + print_graph_nested(event, data); +out_free: + free(data); +} + +static void +pretty_print_func_ret(void *data, int size __unused, struct event *event, + int cpu, int pid, const char *comm, + unsigned long secs, unsigned long usecs) +{ + unsigned long long rettime, calltime; + unsigned long long duration, depth; + struct format_field *field; + int i; + + printf("%5lu.%06lu | ", secs, usecs); + + print_graph_cpu(cpu); + print_graph_proc(pid, comm); + + printf(" | "); + + field = find_field(event, "rettime"); + if (!field) + die("can't find rettime in return graph"); + rettime = read_size(data + field->offset, field->size); + + field = find_field(event, "calltime"); + if (!field) + die("can't find calltime in return graph"); + calltime = read_size(data + field->offset, field->size); + + duration = rettime - calltime; + + /* Overhead */ + print_graph_overhead(duration); + + /* Duration */ + print_graph_duration(duration); + + field = find_field(event, "depth"); + if (!field) + die("can't find depth in entry graph"); + depth = read_size(data + field->offset, field->size); + + /* Function */ + for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) + printf(" "); + + printf("}"); +} + +static void +pretty_print_func_graph(void *data, int size, struct event *event, + int cpu, int pid, const char *comm, + unsigned long secs, unsigned long usecs) +{ + if (event->flags & EVENT_FL_ISFUNCENT) + pretty_print_func_ent(data, size, event, + cpu, pid, comm, secs, usecs); + else if (event->flags & EVENT_FL_ISFUNCRET) + pretty_print_func_ret(data, size, event, + cpu, pid, comm, secs, usecs); + printf("\n"); +} + +void print_event(int cpu, void *data, int size, unsigned long long nsecs, + char *comm) +{ + struct event *event; + unsigned long secs; + unsigned long usecs; + int type; + int pid; + + secs = nsecs / NSECS_PER_SEC; + nsecs -= secs * NSECS_PER_SEC; + usecs = nsecs / NSECS_PER_USEC; + + type = parse_common_type(data); + + event = find_event(type); + if (!event) + die("ug! no event found for type %d", type); + + pid = parse_common_pid(data); + + if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET)) + return pretty_print_func_graph(data, size, event, cpu, + pid, comm, secs, usecs); + + printf("%16s-%-5d [%03d] %5lu.%06lu: %s: ", + comm, pid, cpu, + secs, usecs, event->name); + + pretty_print(data, size, event); + printf("\n"); +} + +static void print_fields(struct print_flag_sym *field) +{ + printf("{ %s, %s }", field->value, field->str); + if (field->next) { + printf(", "); + print_fields(field->next); + } +} + +static void print_args(struct print_arg *args) +{ + int print_paren = 1; + + switch (args->type) { + case PRINT_NULL: + printf("null"); + break; + case PRINT_ATOM: + printf("%s", args->atom.atom); + break; + case PRINT_FIELD: + printf("REC->%s", args->field.name); + break; + case PRINT_FLAGS: + printf("__print_flags("); + print_args(args->flags.field); + printf(", %s, ", args->flags.delim); + print_fields(args->flags.flags); + printf(")"); + break; + case PRINT_SYMBOL: + printf("__print_symbolic("); + print_args(args->symbol.field); + printf(", "); + print_fields(args->symbol.symbols); + printf(")"); + break; + case PRINT_STRING: + printf("__get_str(%s)", args->string.string); + break; + case PRINT_TYPE: + printf("(%s)", args->typecast.type); + print_args(args->typecast.item); + break; + case PRINT_OP: + if (strcmp(args->op.op, ":") == 0) + print_paren = 0; + if (print_paren) + printf("("); + print_args(args->op.left); + printf(" %s ", args->op.op); + print_args(args->op.right); + if (print_paren) + printf(")"); + break; + default: + /* we should warn... */ + return; + } + if (args->next) { + printf("\n"); + print_args(args->next); + } +} + +static void parse_header_field(char *type, + int *offset, int *size) +{ + char *token; + + if (read_expected(EVENT_ITEM, (char *)"field") < 0) + return; + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + /* type */ + if (read_expect_type(EVENT_ITEM, &token) < 0) + return; + free_token(token); + + if (read_expected(EVENT_ITEM, type) < 0) + return; + if (read_expected(EVENT_OP, (char *)";") < 0) + return; + if (read_expected(EVENT_ITEM, (char *)"offset") < 0) + return; + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + return; + *offset = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + return; + if (read_expected(EVENT_ITEM, (char *)"size") < 0) + return; + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + return; + *size = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + return; + if (read_expect_type(EVENT_NEWLINE, &token) < 0) + return; + free_token(token); +} + +int parse_header_page(char *buf, unsigned long size) +{ + init_input_buf(buf, size); + + parse_header_field((char *)"timestamp", &header_page_ts_offset, + &header_page_ts_size); + parse_header_field((char *)"commit", &header_page_size_offset, + &header_page_size_size); + parse_header_field((char *)"data", &header_page_data_offset, + &header_page_data_size); + + return 0; +} + +int parse_ftrace_file(char *buf, unsigned long size) +{ + struct format_field *field; + struct print_arg *arg, **list; + struct event *event; + int ret; + + init_input_buf(buf, size); + + event = alloc_event(); + if (!event) + return -ENOMEM; + + event->flags |= EVENT_FL_ISFTRACE; + + event->name = event_read_name(); + if (!event->name) + die("failed to read ftrace event name"); + + if (strcmp(event->name, "function") == 0) + event->flags |= EVENT_FL_ISFUNC; + + else if (strcmp(event->name, "funcgraph_entry") == 0) + event->flags |= EVENT_FL_ISFUNCENT; + + else if (strcmp(event->name, "funcgraph_exit") == 0) + event->flags |= EVENT_FL_ISFUNCRET; + + else if (strcmp(event->name, "bprint") == 0) + event->flags |= EVENT_FL_ISBPRINT; + + event->id = event_read_id(); + if (event->id < 0) + die("failed to read ftrace event id"); + + add_event(event); + + ret = event_read_format(event); + if (ret < 0) + die("failed to read ftrace event format"); + + ret = event_read_print(event); + if (ret < 0) + die("failed to read ftrace event print fmt"); + + /* + * The arguments for ftrace files are parsed by the fields. + * Set up the fields as their arguments. + */ + list = &event->print_fmt.args; + for (field = event->format.fields; field; field = field->next) { + arg = malloc_or_die(sizeof(*arg)); + memset(arg, 0, sizeof(*arg)); + *list = arg; + list = &arg->next; + arg->type = PRINT_FIELD; + arg->field.name = field->name; + arg->field.field = field; + } + return 0; +} + +int parse_event_file(char *buf, unsigned long size, char *system__unused __unused) +{ + struct event *event; + int ret; + + init_input_buf(buf, size); + + event = alloc_event(); + if (!event) + return -ENOMEM; + + event->name = event_read_name(); + if (!event->name) + die("failed to read event name"); + + event->id = event_read_id(); + if (event->id < 0) + die("failed to read event id"); + + ret = event_read_format(event); + if (ret < 0) + die("failed to read event format"); + + ret = event_read_print(event); + if (ret < 0) + die("failed to read event print fmt"); + +#define PRINT_ARGS 0 + if (PRINT_ARGS && event->print_fmt.args) + print_args(event->print_fmt.args); + + add_event(event); + return 0; +} + +void parse_set_info(int nr_cpus, int long_sz) +{ + cpus = nr_cpus; + long_size = long_sz; +} From 5f9c39dca52d3e639ac899e169f408c6fd8396cc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Aug 2009 16:18:08 +0200 Subject: [PATCH 19/60] perf tools: Add perf trace This adds perf trace into the set of perf tools. It is written to fetch the tracepoint samples from perf events and display them, according to the events information given by the debugfs files through the util/trace* tools. It is a rough first shot and doesn't yet handle the cpu, timestamps fields and some other things. Example: perf record -f -e workqueue:workqueue_execution:record -F 1 -a perf trace kblockd/0-236 [000] 0.000000: workqueue_execution: thread=:236 func=cfq_kick_queue+0x0 kondemand/0-360 [000] 0.000000: workqueue_execution: thread=:360 func=do_dbs_timer+0x0 kondemand/0-360 [000] 0.000000: workqueue_execution: thread=:360 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 kondemand/1-361 [000] 0.000000: workqueue_execution: thread=:361 func=do_dbs_timer+0x0 Todo: - A lot of things! Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: "Luis Claudio R. Goncalves" Cc: Clark Williams Cc: Jon Masters Cc: Mathieu Desnoyers Cc: Christoph Hellwig Cc: Xiao Guangrong Cc: Zhaolei Cc: Li Zefan Cc: Lai Jiangshan Cc: Masami Hiramatsu Cc: Tom Zanussi Cc: "Frank Ch. Eigler" Cc: Roland McGrath Cc: Jason Baron Cc: Paul Mackerras Cc: Jiaying Zhang Cc: Anton Blanchard LKML-Reference: <1250518688-7207-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 4 + tools/perf/builtin-record.c | 4 + tools/perf/builtin-trace.c | 277 ++++++++++++++++++++++++++++++++++++ tools/perf/perf.c | 1 + tools/perf/util/util.h | 1 + 5 files changed, 287 insertions(+) create mode 100644 tools/perf/builtin-trace.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 5d54ddb83ab1..c481a513f551 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -347,6 +347,9 @@ LIB_OBJS += util/values.o LIB_OBJS += util/debug.o LIB_OBJS += util/map.o LIB_OBJS += util/thread.o +LIB_OBJS += util/trace-event-parse.o +LIB_OBJS += util/trace-event-read.o +LIB_OBJS += util/trace-event-info.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o @@ -355,6 +358,7 @@ BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o +BUILTIN_OBJS += builtin-trace.o PERFLIBS = $(LIB_FILE) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6a5db675ee4f..3ce2f03f217a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -17,6 +17,7 @@ #include "util/header.h" #include "util/event.h" #include "util/debug.h" +#include "util/trace-event.h" #include #include @@ -519,6 +520,9 @@ static int __cmd_record(int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); + if (raw_samples) + read_tracing_data(); + if (!stat(output_name, &st) && st.st_size) { if (!force && !append_file) { fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c new file mode 100644 index 000000000000..b160a9f61bf3 --- /dev/null +++ b/tools/perf/builtin-trace.c @@ -0,0 +1,277 @@ +#include "builtin.h" + +#include "util/util.h" +#include "util/cache.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/header.h" + +#include "util/parse-options.h" + +#include "perf.h" +#include "util/debug.h" + +#include "util/trace-event.h" + +static char const *input_name = "perf.data"; +static int input; +static unsigned long page_size; +static unsigned long mmap_window = 32; + +static unsigned long total = 0; +static unsigned long total_comm = 0; + +static struct rb_root threads; +static struct thread *last_match; + +static struct perf_header *header; +static u64 sample_type; + + +static int +process_comm_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct thread *thread; + + thread = threads__findnew(event->comm.pid, &threads, &last_match); + + dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->comm.comm, event->comm.pid); + + if (thread == NULL || + thread__set_comm(thread, event->comm.comm)) { + dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + return -1; + } + total_comm++; + + return 0; +} + +static int +process_sample_event(event_t *event, unsigned long offset, unsigned long head) +{ + char level; + int show = 0; + struct dso *dso = NULL; + struct thread *thread; + u64 ip = event->ip.ip; + u64 period = 1; + void *more_data = event->ip.__more_data; + int cpumode; + + thread = threads__findnew(event->ip.pid, &threads, &last_match); + + if (sample_type & PERF_SAMPLE_PERIOD) { + period = *(u64 *)more_data; + more_data += sizeof(u64); + } + + dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.misc, + event->ip.pid, event->ip.tid, + (void *)(long)ip, + (long long)period); + + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + + if (thread == NULL) { + eprintf("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + + if (cpumode == PERF_EVENT_MISC_KERNEL) { + show = SHOW_KERNEL; + level = 'k'; + + dso = kernel_dso; + + dump_printf(" ...... dso: %s\n", dso->name); + + } else if (cpumode == PERF_EVENT_MISC_USER) { + + show = SHOW_USER; + level = '.'; + + } else { + show = SHOW_HV; + level = 'H'; + + dso = hypervisor_dso; + + dump_printf(" ...... dso: [hypervisor]\n"); + } + + if (sample_type & PERF_SAMPLE_RAW) { + struct { + u32 size; + char data[0]; + } *raw = more_data; + + /* + * FIXME: better resolve from pid from the struct trace_entry + * field, although it should be the same than this perf + * event pid + */ + print_event(0, raw->data, raw->size, 0, thread->comm); + } + total += period; + + return 0; +} + +static int +process_event(event_t *event, unsigned long offset, unsigned long head) +{ + trace_event(event); + + switch (event->header.type) { + case PERF_EVENT_MMAP ... PERF_EVENT_LOST: + return 0; + + case PERF_EVENT_COMM: + return process_comm_event(event, offset, head); + + case PERF_EVENT_EXIT ... PERF_EVENT_READ: + return 0; + + case PERF_EVENT_SAMPLE: + return process_sample_event(event, offset, head); + + case PERF_EVENT_MAX: + default: + return -1; + } + + return 0; +} + +static int __cmd_trace(void) +{ + int ret, rc = EXIT_FAILURE; + unsigned long offset = 0; + unsigned long head = 0; + struct stat perf_stat; + event_t *event; + uint32_t size; + char *buf; + + trace_report(); + + input = open(input_name, O_RDONLY); + if (input < 0) { + perror("failed to open file"); + exit(-1); + } + + ret = fstat(input, &perf_stat); + if (ret < 0) { + perror("failed to stat file"); + exit(-1); + } + + if (!perf_stat.st_size) { + fprintf(stderr, "zero-sized file, nothing to do!\n"); + exit(0); + } + header = perf_header__read(input); + sample_type = perf_header__sample_type(header); + + if (load_kernel() < 0) { + perror("failed to load kernel symbols"); + return EXIT_FAILURE; + } + +remap: + buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, + MAP_SHARED, input, offset); + if (buf == MAP_FAILED) { + perror("failed to mmap file"); + exit(-1); + } + +more: + event = (event_t *)(buf + head); + + size = event->header.size; + if (!size) + size = 8; + + if (head + event->header.size >= page_size * mmap_window) { + unsigned long shift = page_size * (head / page_size); + int res; + + res = munmap(buf, page_size * mmap_window); + assert(res == 0); + + offset += shift; + head -= shift; + goto remap; + } + + size = event->header.size; + + + if (!size || process_event(event, offset, head) < 0) { + + /* + * assume we lost track of the stream, check alignment, and + * increment a single u64 in the hope to catch on again 'soon'. + */ + + if (unlikely(head & 7)) + head &= ~7ULL; + + size = 8; + } + + head += size; + + if (offset + head < (unsigned long)perf_stat.st_size) + goto more; + + rc = EXIT_SUCCESS; + close(input); + + return rc; +} + +static const char * const annotate_usage[] = { + "perf trace [] ", + NULL +}; + +static const struct option options[] = { + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), +}; + +int cmd_trace(int argc, const char **argv, const char *prefix __used) +{ + symbol__init(); + page_size = getpagesize(); + + argc = parse_options(argc, argv, options, annotate_usage, 0); + if (argc) { + /* + * Special case: if there's an argument left then assume tha + * it's a symbol filter: + */ + if (argc > 1) + usage_with_options(annotate_usage, options); + } + + + setup_pager(); + + return __cmd_trace(); +} diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 31982ad064b4..fe4589dde950 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -292,6 +292,7 @@ static void handle_internal_command(int argc, const char **argv) { "top", cmd_top, 0 }, { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, + { "trace", cmd_trace, 0 }, }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index d61a6f037631..c62ef9720c6a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -311,6 +311,7 @@ static inline int has_extension(const char *filename, const char *ext) #undef isspace #undef isdigit #undef isalpha +#undef isprint #undef isalnum #undef tolower #undef toupper From 6ede59c412579303a25c11f93d4d262e100fc7e6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Aug 2009 23:07:48 +0200 Subject: [PATCH 20/60] perf tools: Fix spelling mistake in callchain error While running perf report -g in a perf.data file that hasn't been recorded in callchain mode, the error reported has a spelling issue: ./perf report -g selected -c but no callchain data. Did you call perf record without -g? Fix it. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1250543271-8383-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1e3ad22d53dc..ed1fdab3a1f5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1401,7 +1401,7 @@ static int __cmd_report(void) exit(-1); } if (callchain) { - fprintf(stderr, "selected -c but no callchain data." + fprintf(stderr, "selected -g but no callchain data." " Did you call perf record without" " -g?\n"); exit(-1); From 4bf2364a951d3c043e132e4451d90c7bc74dee83 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Aug 2009 23:07:49 +0200 Subject: [PATCH 21/60] perf tools: Warn while running perf trace without sample When a user runs perf trace using an input with logged counters without PERF_SAMPLE_RAW attribute, warn by giving a nice tip. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1250543271-8383-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b160a9f61bf3..88eef71bce6d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -184,6 +184,10 @@ static int __cmd_trace(void) header = perf_header__read(input); sample_type = perf_header__sample_type(header); + if (!(sample_type & PERF_SAMPLE_RAW)) + die("No trace sample to read. Did you call perf record " + "without -R?"); + if (load_kernel() < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; From 9df37ddd81f54dd41dc4958055c3a3c9b6840aef Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Aug 2009 23:07:50 +0200 Subject: [PATCH 22/60] perf tools: Record events info also when :record suffix is used. You can enable a counter's PERF_SAMPLE_RAW attribute in two fashions: - using the -R option (every counters get PERF_SAMPLE_RAW) - using the :record suffix in a trace event counter name Currently we record the events info in a trace.info file from perf record when the former method is used but we omit it with the latter. Check both situations. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1250543271-8383-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3ce2f03f217a..acbe59444385 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -520,9 +520,6 @@ static int __cmd_record(int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); - if (raw_samples) - read_tracing_data(); - if (!stat(output_name, &st) && st.st_size) { if (!force && !append_file) { fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", @@ -550,6 +547,17 @@ static int __cmd_record(int argc, const char **argv) else header = perf_header__new(); + + if (raw_samples) { + read_tracing_data(); + } else { + for (i = 0; i < nr_counters; i++) { + if (attrs[i].sample_type & PERF_SAMPLE_RAW) { + read_tracing_data(); + break; + } + } + } atexit(atexit_header); if (!system_wide) { From 3f9edc2382d5f7c97c693838abb207a9d6bab1fa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Aug 2009 23:07:51 +0200 Subject: [PATCH 23/60] perf tools: Make trace event format parser aware of cast to pointers The ftrace event format parser handles the usual casts but not the cast to pointers. Such casts have been introduced recently with the module trace events and raise the following parsing error: Fatal: bad op token ) This is because it considers the "*" character as a binary operator. Make it then aware of casts to pointers. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1250543271-8383-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-parse.c | 31 +++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index ead6a9ad3599..b53b27f34e4e 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1462,6 +1462,7 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) { struct print_arg *item_arg; enum event_type type; + int ptr_cast = 0; char *token; type = process_arg(event, arg, &token); @@ -1469,11 +1470,26 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) if (type == EVENT_ERROR) return EVENT_ERROR; - if (type == EVENT_OP) - type = process_op(event, arg, &token); + if (type == EVENT_OP) { + /* handle the ptr casts */ + if (!strcmp(token, "*")) { + /* + * FIXME: should we zapp whitespaces before ')' ? + * (may require a peek_token_item()) + */ + if (__peek_char() == ')') { + ptr_cast = 1; + free_token(token); + type = read_token_item(&token); + } + } + if (!ptr_cast) { + type = process_op(event, arg, &token); - if (type == EVENT_ERROR) - return EVENT_ERROR; + if (type == EVENT_ERROR) + return EVENT_ERROR; + } + } if (test_type_token(type, token, EVENT_DELIM, (char *)")")) { free_token(token); @@ -1499,6 +1515,13 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) item_arg = malloc_or_die(sizeof(*item_arg)); arg->type = PRINT_TYPE; + if (ptr_cast) { + char *old = arg->atom.atom; + + arg->atom.atom = malloc_or_die(strlen(old + 3)); + sprintf(arg->atom.atom, "%s *", old); + free(old); + } arg->typecast.type = arg->atom.atom; arg->typecast.item = item_arg; type = process_arg_token(event, item_arg, &token, type); From 1660e9d3d04b6c636b7171bf6c08ac7b82a7de79 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 17 Aug 2009 14:36:32 +1000 Subject: [PATCH 24/60] powerpc/32: Always order writes to halves of 64-bit PTEs On 32-bit systems with 64-bit PTEs, the PTEs have to be written in two 32-bit halves. On SMP we write the higher-order half and then the lower-order half, with a write barrier between the two halves, but on UP there was no particular ordering of the writes to the two halves. This extends the ordering that we already do on SMP to the UP case as well. The reason is that with the perf_counter subsystem potentially accessing user memory at interrupt time to get stack traces, we have to be careful not to create an incorrect but apparently valid PTE even on UP. Acked-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index eb17da781128..2a5da069714e 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -104,8 +104,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, else pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); -#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP) - /* Second case is 32-bit with 64-bit PTE in SMP mode. In this case, we +#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) + /* Second case is 32-bit with 64-bit PTE. In this case, we * can just store as long as we do the two halves in the right order * with a barrier in between. This is possible because we take care, * in the hash code, to pre-invalidate if the PTE was already hashed, @@ -140,7 +140,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, #else /* Anything else just stores the PTE normally. That covers all 64-bit - * cases, and 32-bit non-hash with 64-bit PTEs in UP mode + * cases, and 32-bit non-hash with 32-bit PTEs. */ *ptep = pte; #endif From 9c1e105238c474d19905af504f2e7f42d4f71f9e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 17 Aug 2009 15:17:54 +1000 Subject: [PATCH 25/60] powerpc: Allow perf_counters to access user memory at interrupt time This provides a mechanism to allow the perf_counters code to access user memory in a PMU interrupt routine. Such an access can cause various kinds of interrupt: SLB miss, MMU hash table miss, segment table miss, or TLB miss, depending on the processor. This commit only deals with 64-bit classic/server processors, which use an MMU hash table. 32-bit processors are already able to access user memory at interrupt time. Since we don't soft-disable on 32-bit, we avoid the possibility of reentering hash_page or the TLB miss handlers, since they run with interrupts disabled. On 64-bit processors, an SLB miss interrupt on a user address will update the slb_cache and slb_cache_ptr fields in the paca. This is OK except in the case where a PMU interrupt occurs in switch_slb, which also accesses those fields. To prevent this, we hard-disable interrupts in switch_slb. Interrupts are already soft-disabled at this point, and will get hard-enabled when they get soft-enabled later. This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice, and to make sure that it clears the slb_cache_ptr when called from other callers than switch_slb, the existing routine is renamed to __slb_flush_and_rebolt, which is called by switch_slb and the new version of slb_flush_and_rebolt. Similarly, switch_stab (used on POWER3 and RS64 processors) gets a hard_irq_disable() to protect the per-cpu variables used there and in ste_allocate. If a MMU hashtable miss interrupt occurs, normally we would call hash_page to look up the Linux PTE for the address and create a HPTE. However, hash_page is fairly complex and takes some locks, so to avoid the possibility of deadlock, we check the preemption count to see if we are in a (pseudo-)NMI handler, and if so, we don't call hash_page but instead treat it like a bad access that will get reported up through the exception table mechanism. An interrupt whose handler runs even though the interrupt occurred when soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI handler, which should use nmi_enter()/nmi_exit() rather than irq_enter()/irq_exit(). Acked-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/kernel/exceptions-64s.S | 19 ++++++++++++++ arch/powerpc/mm/slb.c | 37 +++++++++++++++++++--------- arch/powerpc/mm/stab.c | 11 ++++++++- 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 561b64652311..197b15646eeb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -67,6 +67,8 @@ int main(void) DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); #ifdef CONFIG_PPC64 DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); + DEFINE(SIGSEGV, SIGSEGV); + DEFINE(NMI_MASK, NMI_MASK); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eb898112e577..8ac85e08ffae 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -729,6 +729,11 @@ BEGIN_FTR_SECTION bne- do_ste_alloc /* If so handle it */ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + clrrdi r11,r1,THREAD_SHIFT + lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ + andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ + bne 77f /* then don't call hash_page now */ + /* * On iSeries, we soft-disable interrupts here, then * hard-enable interrupts so that the hash_page code can spin on @@ -833,6 +838,20 @@ handle_page_fault: bl .low_hash_fault b .ret_from_except +/* + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ +77: bl .save_nvgprs + mr r4,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + li r5,SIGSEGV + bl .bad_page_fault + b .ret_from_except + /* here we have a segment miss */ do_ste_alloc: bl .ste_allocate /* try to insert stab entry */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5b7038f248b6..a685652effeb 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } -void slb_flush_and_rebolt(void) +static void __slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED * appropriately too. */ unsigned long linear_llp, vmalloc_llp, lflags, vflags; unsigned long ksp_esid_data, ksp_vsid_data; - WARN_ON(!irqs_disabled()); - linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; lflags = SLB_VSID_KERNEL | linear_llp; @@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void) ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; } - /* - * We can't take a PMU exception in the following code, so hard - * disable interrupts. - */ - hard_irq_disable(); - /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ asm volatile("isync\n" @@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void) : "memory"); } +void slb_flush_and_rebolt(void) +{ + + WARN_ON(!irqs_disabled()); + + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); + + __slb_flush_and_rebolt(); + get_paca()->slb_cache_ptr = 0; +} + void slb_vmalloc_update(void) { unsigned long vflags; @@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - unsigned long offset = get_paca()->slb_cache_ptr; + unsigned long offset; unsigned long slbie_data = 0; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; + /* + * We need interrupts hard-disabled here, not just soft-disabled, + * so that a PMU interrupt can't occur, which might try to access + * user memory (to get a stack trace) and possible cause an SLB miss + * which would update the slb_cache/slb_cache_ptr fields in the PACA. + */ + hard_irq_disable(); + offset = get_paca()->slb_cache_ptr; if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { int i; @@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) } asm volatile("isync" : : : "memory"); } else { - slb_flush_and_rebolt(); + __slb_flush_and_rebolt(); } /* Workaround POWER5 < DD2.1 issue */ diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 98cd1dc2ae75..ab5fb48b3e90 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) { struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; struct stab_entry *ste; - unsigned long offset = __get_cpu_var(stab_cache_ptr); + unsigned long offset; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; @@ -172,6 +172,15 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); + /* + * We need interrupts hard-disabled here, not just soft-disabled, + * so that a PMU interrupt can't occur, which might try to access + * user memory (to get a stack trace) and possible cause an STAB miss + * which would update the stab_cache/stab_cache_ptr per-cpu variables. + */ + hard_irq_disable(); + + offset = __get_cpu_var(stab_cache_ptr); if (offset <= NR_STAB_CACHE_ENTRIES) { int i; From 20002ded4d937ca87aca6253b874920a96a763c4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 18 Aug 2009 08:25:32 +1000 Subject: [PATCH 26/60] perf_counter: powerpc: Add callchain support This adds support for tracing callchains for powerpc, both 32-bit and 64-bit, and both in the kernel and userspace, from PMU interrupt context. The first three entries stored for each callchain are the NIP (next instruction pointer), LR (link register), and the contents of the LR save area in the second stack frame (the first is ignored because the ABI convention on powerpc is that functions save their return address in their caller's stack frame). Because leaf functions don't have to save their return address (LR value) and don't have to establish a stack frame, it's possible for either or both of LR and the second stack frame's LR save area to have valid return addresses in them. This is basically impossible to disambiguate without either reading the code or looking at auxiliary information such as CFI tables. Since we don't want to do either of those things at interrupt time, we store both LR and the second stack frame's LR save area. Once we get past the second stack frame, there is no ambiguity; all return addresses we get are reliable. For kernel traces, we check whether they are valid kernel instruction addresses and store zero instead if they are not (rather than omitting them, which would make it impossible for userspace to know which was which). We also store zero instead of the second stack frame's LR save area value if it is the same as LR. For kernel traces, we check for interrupt frames, and for user traces, we check for signal frames. In each case, since we're starting a new trace, we store a PERF_CONTEXT_KERNEL/USER marker so that userspace knows that the next three entries are NIP, LR and the second stack frame for the interrupted context. We read user memory with __get_user_inatomic. On 64-bit, if this PMU interrupt occurred while interrupts are soft-disabled, and there is no MMU hash table entry for the page, we will get an -EFAULT return from __get_user_inatomic even if there is a valid Linux PTE for the page, since hash_page isn't reentrant. Thus we have code here to read the Linux PTE and access the page via the kernel linear mapping. Since 64-bit doesn't use (or need) highmem there is no need to do kmap_atomic. On 32-bit, we don't do soft interrupt disabling, so this complication doesn't occur and there is no need to fall back to reading the Linux PTE, since hash_page (or the TLB miss handler) will get called automatically if necessary. Note that we cannot get PMU interrupts in the interval during context switch between switch_mm (which switches the user address space) and switch_to (which actually changes current to the new process). On 64-bit this is because interrupts are hard-disabled in switch_mm and stay hard-disabled until they are soft-enabled later, after switch_to has returned. So there is no possibility of trying to do a user stack trace when the user address space is not current's address space. Acked-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/perf_callchain.c | 527 +++++++++++++++++++++++++++ 2 files changed, 528 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/perf_callchain.c diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b73396b93905..9619285f64e8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c new file mode 100644 index 000000000000..f74b62c67511 --- /dev/null +++ b/arch/powerpc/kernel/perf_callchain.c @@ -0,0 +1,527 @@ +/* + * Performance counter callchain support - powerpc architecture code + * + * Copyright © 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PPC64 +#include "ppc32.h" +#endif + +/* + * Store another value in a callchain_entry. + */ +static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + unsigned int nr = entry->nr; + + if (nr < PERF_MAX_STACK_DEPTH) { + entry->ip[nr] = ip; + entry->nr = nr + 1; + } +} + +/* + * Is sp valid as the address of the next kernel stack frame after prev_sp? + * The next frame may be in a different stack area but should not go + * back down in the same stack area. + */ +static int valid_next_sp(unsigned long sp, unsigned long prev_sp) +{ + if (sp & 0xf) + return 0; /* must be 16-byte aligned */ + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return 0; + if (sp >= prev_sp + STACK_FRAME_OVERHEAD) + return 1; + /* + * sp could decrease when we jump off an interrupt stack + * back to the regular process stack. + */ + if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) + return 1; + return 0; +} + +static void perf_callchain_kernel(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + unsigned long *fp; + + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->nip); + + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return; + + for (;;) { + fp = (unsigned long *) sp; + next_sp = fp[0]; + + if (next_sp == sp + STACK_INT_FRAME_SIZE && + fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + /* + * This looks like an interrupt frame for an + * interrupt that occurred in the kernel + */ + regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + next_ip = regs->nip; + lr = regs->link; + level = 0; + callchain_store(entry, PERF_CONTEXT_KERNEL); + + } else { + if (level == 0) + next_ip = lr; + else + next_ip = fp[STACK_FRAME_LR_SAVE]; + + /* + * We can't tell which of the first two addresses + * we get are valid, but we can filter out the + * obviously bogus ones here. We replace them + * with 0 rather than removing them entirely so + * that userspace can tell which is which. + */ + if ((level == 1 && next_ip == lr) || + (level <= 1 && !kernel_text_address(next_ip))) + next_ip = 0; + + ++level; + } + + callchain_store(entry, next_ip); + if (!valid_next_sp(next_sp, sp)) + return; + sp = next_sp; + } +} + +#ifdef CONFIG_PPC64 + +#ifdef CONFIG_HUGETLB_PAGE +#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize]) +#else +#define is_huge_psize(pagesize) 0 +#endif + +/* + * On 64-bit we don't want to invoke hash_page on user addresses from + * interrupt context, so if the access faults, we read the page tables + * to find which page (if any) is mapped and access it directly. + */ +static int read_user_stack_slow(void __user *ptr, void *ret, int nb) +{ + pgd_t *pgdir; + pte_t *ptep, pte; + int pagesize; + unsigned long addr = (unsigned long) ptr; + unsigned long offset; + unsigned long pfn; + void *kaddr; + + pgdir = current->mm->pgd; + if (!pgdir) + return -EFAULT; + + pagesize = get_slice_psize(current->mm, addr); + + /* align address to page boundary */ + offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); + addr -= offset; + + if (is_huge_psize(pagesize)) + ptep = huge_pte_offset(current->mm, addr); + else + ptep = find_linux_pte(pgdir, addr); + + if (ptep == NULL) + return -EFAULT; + pte = *ptep; + if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) + return -EFAULT; + pfn = pte_pfn(pte); + if (!page_is_ram(pfn)) + return -EFAULT; + + /* no highmem to worry about here */ + kaddr = pfn_to_kaddr(pfn); + memcpy(ret, kaddr + offset, nb); + return 0; +} + +static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || + ((unsigned long)ptr & 7)) + return -EFAULT; + + if (!__get_user_inatomic(*ret, ptr)) + return 0; + + return read_user_stack_slow(ptr, ret, 8); +} + +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + if (!__get_user_inatomic(*ret, ptr)) + return 0; + + return read_user_stack_slow(ptr, ret, 4); +} + +static inline int valid_user_sp(unsigned long sp, int is_64) +{ + if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) + return 0; + return 1; +} + +/* + * 64-bit user processes use the same stack frame for RT and non-RT signals. + */ +struct signal_frame_64 { + char dummy[__SIGNAL_FRAMESIZE]; + struct ucontext uc; + unsigned long unused[2]; + unsigned int tramp[6]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + char abigap[288]; +}; + +static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) +{ + if (nip == fp + offsetof(struct signal_frame_64, tramp)) + return 1; + if (vdso64_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + return 1; + return 0; +} + +/* + * Do some sanity checking on the signal frame pointed to by sp. + * We check the pinfo and puc pointers in the frame. + */ +static int sane_signal_64_frame(unsigned long sp) +{ + struct signal_frame_64 __user *sf; + unsigned long pinfo, puc; + + sf = (struct signal_frame_64 __user *) sp; + if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || + read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) + return 0; + return pinfo == (unsigned long) &sf->info && + puc == (unsigned long) &sf->uc; +} + +static void perf_callchain_user_64(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + struct signal_frame_64 __user *sigframe; + unsigned long __user *fp, *uregs; + + next_ip = regs->nip; + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + + for (;;) { + fp = (unsigned long __user *) sp; + if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) + return; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, which can happen when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_64) && + (is_sigreturn_64_address(next_ip, sp) || + (level <= 1 && is_sigreturn_64_address(lr, sp))) && + sane_signal_64_frame(sp)) { + /* + * This looks like an signal frame + */ + sigframe = (struct signal_frame_64 __user *) sp; + uregs = sigframe->uc.uc_mcontext.gp_regs; + if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || + read_user_stack_64(&uregs[PT_LNK], &lr) || + read_user_stack_64(&uregs[PT_R1], &sp)) + return; + level = 0; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} + +static inline int current_is_64bit(void) +{ + /* + * We can't use test_thread_flag() here because we may be on an + * interrupt stack, and the thread flags don't get copied over + * from the thread_info on the main stack to the interrupt stack. + */ + return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); +} + +#else /* CONFIG_PPC64 */ +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + return __get_user_inatomic(*ret, ptr); +} + +static inline void perf_callchain_user_64(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ +} + +static inline int current_is_64bit(void) +{ + return 0; +} + +static inline int valid_user_sp(unsigned long sp, int is_64) +{ + if (!sp || (sp & 7) || sp > TASK_SIZE - 32) + return 0; + return 1; +} + +#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE +#define sigcontext32 sigcontext +#define mcontext32 mcontext +#define ucontext32 ucontext +#define compat_siginfo_t struct siginfo + +#endif /* CONFIG_PPC64 */ + +/* + * Layout for non-RT signal frames + */ +struct signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32]; + struct sigcontext32 sctx; + struct mcontext32 mctx; + int abigap[56]; +}; + +/* + * Layout for RT signal frames + */ +struct rt_signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32 + 16]; + compat_siginfo_t info; + struct ucontext32 uc; + int abigap[56]; +}; + +static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) + return 1; + if (vdso32_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_sigtramp) + return 1; + return 0; +} + +static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct rt_signal_frame_32, + uc.uc_mcontext.mc_pad)) + return 1; + if (vdso32_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + return 1; + return 0; +} + +static int sane_signal_32_frame(unsigned int sp) +{ + struct signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) + return 0; + return regs == (unsigned long) &sf->mctx; +} + +static int sane_rt_signal_32_frame(unsigned int sp) +{ + struct rt_signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) + return 0; + return regs == (unsigned long) &sf->uc.uc_mcontext; +} + +static unsigned int __user *signal_frame_32_regs(unsigned int sp, + unsigned int next_sp, unsigned int next_ip) +{ + struct mcontext32 __user *mctx = NULL; + struct signal_frame_32 __user *sf; + struct rt_signal_frame_32 __user *rt_sf; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, for example, when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_32) && + is_sigreturn_32_address(next_ip, sp) && + sane_signal_32_frame(sp)) { + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + mctx = &sf->mctx; + } + + if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && + is_rt_sigreturn_32_address(next_ip, sp) && + sane_rt_signal_32_frame(sp)) { + rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + mctx = &rt_sf->uc.uc_mcontext; + } + + if (!mctx) + return NULL; + return mctx->mc_gregs; +} + +static void perf_callchain_user_32(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned int sp, next_sp; + unsigned int next_ip; + unsigned int lr; + long level = 0; + unsigned int __user *fp, *uregs; + + next_ip = regs->nip; + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + + while (entry->nr < PERF_MAX_STACK_DEPTH) { + fp = (unsigned int __user *) (unsigned long) sp; + if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) + return; + + uregs = signal_frame_32_regs(sp, next_sp, next_ip); + if (!uregs && level <= 1) + uregs = signal_frame_32_regs(sp, next_sp, lr); + if (uregs) { + /* + * This looks like an signal frame, so restart + * the stack trace with the values in it. + */ + if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || + read_user_stack_32(&uregs[PT_LNK], &lr) || + read_user_stack_32(&uregs[PT_R1], &sp)) + return; + level = 0; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} + +/* + * Since we can't get PMU interrupts inside a PMU interrupt handler, + * we don't need separate irq and nmi entries here. + */ +static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry = &__get_cpu_var(callchain); + + entry->nr = 0; + + if (current->pid == 0) /* idle task? */ + return entry; + + if (!user_mode(regs)) { + perf_callchain_kernel(regs, entry); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + if (current_is_64bit()) + perf_callchain_user_64(regs, entry); + else + perf_callchain_user_32(regs, entry); + } + + return entry; +} From 1f18345bdfd489fde1085bc85839d5d3645cf511 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 18 Aug 2009 10:59:47 +0200 Subject: [PATCH 27/60] perf tools: Remove obsolete defines The _XOPEN_SOURCE* defines are not really needed on Linux and it's not like we'll port this to AIX ;-) The define also broke the build with gcc 4.4.1: CC util/trace-event-parse.o In file included from util/trace-event-parse.c:32: util/util.h:43:1: error: "_XOPEN_SOURCE" redefined So remove them. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/util.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index d61a6f037631..15004d211663 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -39,10 +39,6 @@ /* Approximation of the length of the decimal representation of this type. */ #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) -#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ -#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ -#endif #define _ALL_SOURCE 1 #define _GNU_SOURCE 1 #define _BSD_SOURCE 1 From 4273b005875c34beda4a11c9d4a9132d80378036 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Aug 2009 16:03:46 +0200 Subject: [PATCH 28/60] perf tools: Fix comm column adjusting The librarization of the thread helpers between annotate and report lost some perf report specifics. This patch fixes the thread comm column adjusting that has been omitted during this export. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250604226-6852-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1e3ad22d53dc..3fc0d471b90b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -665,6 +665,27 @@ static void dso__calc_col_width(struct dso *self) self->slen_calculated = 1; } +static int thread__set_comm_adjust(struct thread *self, const char *comm) +{ + int ret = thread__set_comm(self, comm); + + if (ret) + return ret; + + if (!col_width_list_str && !field_sep && + (!comm_list || strlist__has_entry(comm_list, comm))) { + unsigned int slen = strlen(comm); + + if (slen > comms__col_width) { + comms__col_width = slen; + threads__col_width = slen + 6; + } + } + + return 0; +} + + static struct symbol * resolve_symbol(struct thread *thread, struct map **mapp, struct dso **dsop, u64 *ipp) @@ -1056,7 +1077,7 @@ static void register_idle_thread(void) struct thread *thread = threads__findnew(0, &threads, &last_match); if (thread == NULL || - thread__set_comm(thread, "[idle]")) { + thread__set_comm_adjust(thread, "[idle]")) { fprintf(stderr, "problem inserting idle task.\n"); exit(-1); } @@ -1226,7 +1247,7 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) event->comm.comm, event->comm.pid); if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { + thread__set_comm_adjust(thread, event->comm.comm)) { dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); return -1; } From 6e086437f35ad9fda448711732c4ce0f82aad569 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Aug 2009 17:04:03 +0200 Subject: [PATCH 29/60] perf tools: Save partial non-overlapping map The librarization of the thread helpers between annotate and report lost some perf report specifics. thread__insert_map() had its most uptodate version in perf report which cared about partial map overlapping. In case of overlap between two maps, perf annotate's version removes the whole old map without considering if it partially or absolutely overlaps the new map. We exported the odd version, change it by using the perf report version. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250607843-7395-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 00c14b98d651..f98032c135c6 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -4,6 +4,7 @@ #include #include "thread.h" #include "util.h" +#include "debug.h" static struct thread *thread__new(pid_t pid) { @@ -85,9 +86,27 @@ void thread__insert_map(struct thread *self, struct map *map) list_for_each_entry_safe(pos, tmp, &self->maps, node) { if (map__overlap(pos, map)) { - list_del_init(&pos->node); - /* XXX leaks dsos */ - free(pos); + if (verbose >= 2) { + printf("overlapping maps:\n"); + map__fprintf(map, stdout); + map__fprintf(pos, stdout); + } + + if (map->start <= pos->start && map->end > pos->start) + pos->start = map->end; + + if (map->end >= pos->end && map->start < pos->end) + pos->end = map->start; + + if (verbose >= 2) { + printf("after collision:\n"); + map__fprintf(pos, stdout); + } + + if (pos->start >= pos->end) { + list_del_init(&pos->node); + free(pos); + } } } From 1909629fb1ec9800cf2cb0091870d6a1c1ca665f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Aug 2009 14:56:03 -0400 Subject: [PATCH 30/60] perf trace: Add OPT_END to option array of perf-trace Add OPT_END to option array of perf-trace for fixing a SEGV bug when showing perf-trace help message. Without this patch; ./perf trace -h usage: perf trace [] -D, --dump-raw-trace dump raw trace in ASCII -v, --verbose be more verbose (show symbol address, etc) -f, Segmentation fault With this patch: ./perf trace -h usage: perf trace [] -D, --dump-raw-trace dump raw trace in ASCII -v, --verbose be more verbose (show symbol address, etc) Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Masami Hiramatsu Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Xiao Guangrong Cc: Zhaolei Cc: Li Zefan Cc: Lai Jiangshan Cc: Tom Zanussi LKML-Reference: <20090821185603.11039.62109.stgit@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 88eef71bce6d..dd3c2e7c9a18 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -257,6 +257,7 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_END() }; int cmd_trace(int argc, const char **argv, const char *prefix __used) From fa289beca9de9119c7760bd984f3640da21bc94c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Aug 2009 15:17:20 +1000 Subject: [PATCH 31/60] perf_counter: Start counting time enabled when group leader gets enabled Currently, if a group is created where the group leader is initially disabled but a non-leader member is initially enabled, and then the leader is subsequently enabled some time later, the time_enabled for the non-leader member will reflect the whole time since it was created, not just the time since the leader was enabled. This is incorrect, because all of the members are effectively disabled while the leader is disabled, since none of the members can go on the PMU if the leader can't. Thus we have to update the ->tstamp_enabled for all the enabled group members when a group leader is enabled, so that the time_enabled computation only counts the time since the leader was enabled. Similarly, when disabling a group leader we have to update the time_enabled and time_running for all of the group members. Also, in update_counter_times, we have to treat a counter whose group leader is disabled as being disabled. Reported-by: Stephane Eranian Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: LKML-Reference: <19091.29664.342227.445006@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f274e1959885..06bf6a4f2608 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -469,7 +469,8 @@ static void update_counter_times(struct perf_counter *counter) struct perf_counter_context *ctx = counter->ctx; u64 run_end; - if (counter->state < PERF_COUNTER_STATE_INACTIVE) + if (counter->state < PERF_COUNTER_STATE_INACTIVE || + counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) return; counter->total_time_enabled = ctx->time - counter->tstamp_enabled; @@ -518,7 +519,7 @@ static void __perf_counter_disable(void *info) */ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { update_context_time(ctx); - update_counter_times(counter); + update_group_times(counter); if (counter == counter->group_leader) group_sched_out(counter, cpuctx, ctx); else @@ -573,7 +574,7 @@ static void perf_counter_disable(struct perf_counter *counter) * in, so we can change the state safely. */ if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_counter_times(counter); + update_group_times(counter); counter->state = PERF_COUNTER_STATE_OFF; } @@ -850,6 +851,27 @@ perf_install_in_context(struct perf_counter_context *ctx, spin_unlock_irq(&ctx->lock); } +/* + * Put a counter into inactive state and update time fields. + * Enabling the leader of a group effectively enables all + * the group members that aren't explicitly disabled, so we + * have to update their ->tstamp_enabled also. + * Note: this works for group members as well as group leaders + * since the non-leader members' sibling_lists will be empty. + */ +static void __perf_counter_mark_enabled(struct perf_counter *counter, + struct perf_counter_context *ctx) +{ + struct perf_counter *sub; + + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time - counter->total_time_enabled; + list_for_each_entry(sub, &counter->sibling_list, list_entry) + if (sub->state >= PERF_COUNTER_STATE_INACTIVE) + sub->tstamp_enabled = + ctx->time - sub->total_time_enabled; +} + /* * Cross CPU call to enable a performance counter */ @@ -877,8 +899,7 @@ static void __perf_counter_enable(void *info) if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time - counter->total_time_enabled; + __perf_counter_mark_enabled(counter, ctx); /* * If the counter is in a group and isn't the group leader, @@ -971,11 +992,9 @@ static void perf_counter_enable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_OFF) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = - ctx->time - counter->total_time_enabled; - } + if (counter->state == PERF_COUNTER_STATE_OFF) + __perf_counter_mark_enabled(counter, ctx); + out: spin_unlock_irq(&ctx->lock); } @@ -1479,9 +1498,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task) counter->attr.enable_on_exec = 0; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) continue; - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = - ctx->time - counter->total_time_enabled; + __perf_counter_mark_enabled(counter, ctx); enabled = 1; } From a4be7c2778d1fd8e0a8a5607bec91fa221ab2c91 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Aug 2009 11:18:27 +0200 Subject: [PATCH 32/60] perf_counter: Allow sharing of output channels Provide the ability to configure a counter to send its output to another (already existing) counter's output stream. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: stephane eranian Cc: Paul Mackerras LKML-Reference: <20090819092023.980284148@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 +++ kernel/perf_counter.c | 83 ++++++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b53f7006cc4e..e022b847c90d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -216,6 +216,7 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -415,6 +416,9 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -536,6 +540,7 @@ struct perf_counter { struct list_head sibling_list; int nr_siblings; struct perf_counter *group_leader; + struct perf_counter *output; const struct pmu *pmu; enum perf_counter_active_state state; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 06bf6a4f2608..53abcbefa0bf 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1692,6 +1692,11 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_task_counters); } + if (counter->output) { + fput(counter->output->filp); + counter->output = NULL; + } + if (counter->destroy) counter->destroy(counter); @@ -1977,6 +1982,8 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) return ret; } +int perf_counter_set_output(struct perf_counter *counter, int output_fd); + static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct perf_counter *counter = file->private_data; @@ -2000,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_COUNTER_IOC_PERIOD: return perf_counter_period(counter, (u64 __user *)arg); + case PERF_COUNTER_IOC_SET_OUTPUT: + return perf_counter_set_output(counter, arg); + default: return -ENOTTY; } @@ -2270,6 +2280,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) WARN_ON_ONCE(counter->ctx->parent_ctx); mutex_lock(&counter->mmap_mutex); + if (counter->output) { + ret = -EINVAL; + goto unlock; + } + if (atomic_inc_not_zero(&counter->mmap_count)) { if (nr_pages != counter->data->nr_pages) ret = -EINVAL; @@ -2655,6 +2670,7 @@ static int perf_output_begin(struct perf_output_handle *handle, struct perf_counter *counter, unsigned int size, int nmi, int sample) { + struct perf_counter *output_counter; struct perf_mmap_data *data; unsigned int offset, head; int have_lost; @@ -2664,13 +2680,17 @@ static int perf_output_begin(struct perf_output_handle *handle, u64 lost; } lost_event; + rcu_read_lock(); /* * For inherited counters we send all the output towards the parent. */ if (counter->parent) counter = counter->parent; - rcu_read_lock(); + output_counter = rcu_dereference(counter->output); + if (output_counter) + counter = output_counter; + data = rcu_dereference(counter->data); if (!data) goto out; @@ -4218,6 +4238,57 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr, goto out; } +int perf_counter_set_output(struct perf_counter *counter, int output_fd) +{ + struct perf_counter *output_counter = NULL; + struct file *output_file = NULL; + struct perf_counter *old_output; + int fput_needed = 0; + int ret = -EINVAL; + + if (!output_fd) + goto set; + + output_file = fget_light(output_fd, &fput_needed); + if (!output_file) + return -EBADF; + + if (output_file->f_op != &perf_fops) + goto out; + + output_counter = output_file->private_data; + + /* Don't chain output fds */ + if (output_counter->output) + goto out; + + /* Don't set an output fd when we already have an output channel */ + if (counter->data) + goto out; + + atomic_long_inc(&output_file->f_count); + +set: + mutex_lock(&counter->mmap_mutex); + old_output = counter->output; + rcu_assign_pointer(counter->output, output_counter); + mutex_unlock(&counter->mmap_mutex); + + if (old_output) { + /* + * we need to make sure no existing perf_output_*() + * is still referencing this counter. + */ + synchronize_rcu(); + fput(old_output->filp); + } + + ret = 0; +out: + fput_light(output_file, fput_needed); + return ret; +} + /** * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * @@ -4240,7 +4311,7 @@ SYSCALL_DEFINE5(perf_counter_open, int ret; /* for future expandability... */ - if (flags) + if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) return -EINVAL; ret = perf_copy_attr(attr_uptr, &attr); @@ -4268,7 +4339,7 @@ SYSCALL_DEFINE5(perf_counter_open, * Look up the group leader (we will attach this counter to it): */ group_leader = NULL; - if (group_fd != -1) { + if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { ret = -EINVAL; group_file = fget_light(group_fd, &fput_needed); if (!group_file) @@ -4310,6 +4381,12 @@ SYSCALL_DEFINE5(perf_counter_open, if (!counter_file) goto err_free_put_context; + if (flags & PERF_FLAG_FD_OUTPUT) { + ret = perf_counter_set_output(counter, group_fd); + if (ret) + goto err_free_put_context; + } + counter->filp = counter_file; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); From 7ced156bb8bb6363b6ed541871bf19a90273f910 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Aug 2009 11:51:26 -0300 Subject: [PATCH 33/60] perf top: Show RIP only in verbose mode Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090826145126.GA5255@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 62b55ecab2c6..4002ccb36750 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -483,11 +483,16 @@ static void print_sym_table(void) if (nr_counters == 1) printf(" samples pcnt"); else - printf(" weight samples pcnt"); + printf(" weight samples pcnt"); - printf(" RIP kernel function\n" - " ______ _______ _____ ________________ _______________\n\n" - ); + if (verbose) + printf(" RIP "); + printf(" kernel function\n"); + printf(" %s _______ _____", + nr_counters == 1 ? " " : "______"); + if (verbose) + printf(" ________________"); + printf(" _______________\n\n"); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct symbol *sym; @@ -508,7 +513,9 @@ static void print_sym_table(void) printf("%9.1f %10ld - ", syme->weight, syme->snap_count); percent_color_fprintf(stdout, "%4.1f%%", pcnt); - printf(" - %016llx : %s", sym->start, sym->name); + if (verbose) + printf(" - %016llx", sym->start); + printf(" : %s", sym->name); if (sym->module) printf("\t[%s]", sym->module->name); printf("\n"); From 1ef2ed1066ae9f8080cd96cba11c2d41118b8792 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 28 Aug 2009 03:09:58 +0200 Subject: [PATCH 34/60] perf tools: Only save the event formats we need While opening a trace event counter, every events are saved in the trace.info file. But we only want to save the specifications of the events we are using. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt LKML-Reference: <1251421798-9101-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 4 +- tools/perf/util/parse-events.c | 47 +++++++++++++++---- tools/perf/util/parse-events.h | 13 +++++- tools/perf/util/trace-event-info.c | 72 ++++++++++++++++++++++++----- tools/perf/util/trace-event-parse.c | 1 + tools/perf/util/trace-event-read.c | 1 + tools/perf/util/trace-event.h | 9 ++-- 7 files changed, 120 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index acbe59444385..add514d53d2e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -549,11 +549,11 @@ static int __cmd_record(int argc, const char **argv) if (raw_samples) { - read_tracing_data(); + read_tracing_data(attrs, nr_counters); } else { for (i = 0; i < nr_counters; i++) { if (attrs[i].sample_type & PERF_SAMPLE_RAW) { - read_tracing_data(); + read_tracing_data(attrs, nr_counters); break; } } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 1cda97b39118..89d46c99bc9c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -158,9 +158,9 @@ int valid_debugfs_mount(const char *debugfs) return 0; } -static const char *tracepoint_id_to_name(u64 config) +struct tracepoint_path *tracepoint_id_to_path(u64 config) { - static char tracepoint_name[2 * MAX_EVENT_LENGTH]; + struct tracepoint_path *path = NULL; DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; struct stat st; @@ -170,7 +170,7 @@ static const char *tracepoint_id_to_name(u64 config) char evt_path[MAXPATHLEN]; if (valid_debugfs_mount(debugfs_path)) - return "unkown"; + return NULL; sys_dir = opendir(debugfs_path); if (!sys_dir) @@ -197,10 +197,23 @@ static const char *tracepoint_id_to_name(u64 config) if (id == config) { closedir(evt_dir); closedir(sys_dir); - snprintf(tracepoint_name, 2 * MAX_EVENT_LENGTH, - "%s:%s", sys_dirent.d_name, - evt_dirent.d_name); - return tracepoint_name; + path = calloc(1, sizeof(path)); + path->system = malloc(MAX_EVENT_LENGTH); + if (!path->system) { + free(path); + return NULL; + } + path->name = malloc(MAX_EVENT_LENGTH); + if (!path->name) { + free(path->system); + free(path); + return NULL; + } + strncpy(path->system, sys_dirent.d_name, + MAX_EVENT_LENGTH); + strncpy(path->name, evt_dirent.d_name, + MAX_EVENT_LENGTH); + return path; } } closedir(evt_dir); @@ -208,7 +221,25 @@ static const char *tracepoint_id_to_name(u64 config) cleanup: closedir(sys_dir); - return "unkown"; + return NULL; +} + +#define TP_PATH_LEN (MAX_EVENT_LENGTH * 2 + 1) +static const char *tracepoint_id_to_name(u64 config) +{ + static char buf[TP_PATH_LEN]; + struct tracepoint_path *path; + + path = tracepoint_id_to_path(config); + if (path) { + snprintf(buf, TP_PATH_LEN, "%s:%s", path->system, path->name); + free(path->name); + free(path->system); + free(path); + } else + snprintf(buf, TP_PATH_LEN, "%s:%s", "unknown", "unknown"); + + return buf; } static int is_cache_op_valid(u8 cache_type, u8 cache_op) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 9b1aeea01636..60704c15961f 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -1,10 +1,19 @@ - +#ifndef _PARSE_EVENTS_H +#define _PARSE_EVENTS_H /* * Parse symbolic events/counts passed in as options: */ struct option; +struct tracepoint_path { + char *system; + char *name; + struct tracepoint_path *next; +}; + +extern struct tracepoint_path *tracepoint_id_to_path(u64 config); + extern int nr_counters; extern struct perf_counter_attr attrs[MAX_COUNTERS]; @@ -21,3 +30,5 @@ extern void print_events(void); extern char debugfs_path[]; extern int valid_debugfs_mount(const char *debugfs); + +#endif /* _PARSE_EVENTS_H */ diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 78adff189bba..81615279b876 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -32,7 +32,9 @@ #include #include #include +#include +#include "../perf.h" #include "trace-event.h" @@ -289,7 +291,18 @@ static void read_header_files(void) put_tracing_file(path); } -static void copy_event_system(const char *sys) +static bool name_in_tp_list(char *sys, struct tracepoint_path *tps) +{ + while (tps) { + if (!strcmp(sys, tps->name)) + return true; + tps = tps->next; + } + + return false; +} + +static void copy_event_system(const char *sys, struct tracepoint_path *tps) { unsigned long long size, check_size; struct dirent *dent; @@ -305,7 +318,8 @@ static void copy_event_system(const char *sys) while ((dent = readdir(dir))) { if (strcmp(dent->d_name, ".") == 0 || - strcmp(dent->d_name, "..") == 0) + strcmp(dent->d_name, "..") == 0 || + !name_in_tp_list(dent->d_name, tps)) continue; format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); sprintf(format, "%s/%s/format", sys, dent->d_name); @@ -321,7 +335,8 @@ static void copy_event_system(const char *sys) rewinddir(dir); while ((dent = readdir(dir))) { if (strcmp(dent->d_name, ".") == 0 || - strcmp(dent->d_name, "..") == 0) + strcmp(dent->d_name, "..") == 0 || + !name_in_tp_list(dent->d_name, tps)) continue; format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); sprintf(format, "%s/%s/format", sys, dent->d_name); @@ -340,18 +355,29 @@ static void copy_event_system(const char *sys) } } -static void read_ftrace_files(void) +static void read_ftrace_files(struct tracepoint_path *tps) { char *path; path = get_tracing_file("events/ftrace"); - copy_event_system(path); + copy_event_system(path, tps); put_tracing_file(path); } -static void read_event_files(void) +static bool system_in_tp_list(char *sys, struct tracepoint_path *tps) +{ + while (tps) { + if (!strcmp(sys, tps->system)) + return true; + tps = tps->next; + } + + return false; +} + +static void read_event_files(struct tracepoint_path *tps) { struct dirent *dent; struct stat st; @@ -370,7 +396,8 @@ static void read_event_files(void) while ((dent = readdir(dir))) { if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || - strcmp(dent->d_name, "ftrace") == 0) + strcmp(dent->d_name, "ftrace") == 0 || + !system_in_tp_list(dent->d_name, tps)) continue; sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); sprintf(sys, "%s/%s", path, dent->d_name); @@ -388,7 +415,8 @@ static void read_event_files(void) while ((dent = readdir(dir))) { if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0 || - strcmp(dent->d_name, "ftrace") == 0) + strcmp(dent->d_name, "ftrace") == 0 || + !system_in_tp_list(dent->d_name, tps)) continue; sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); sprintf(sys, "%s/%s", path, dent->d_name); @@ -396,7 +424,7 @@ static void read_event_files(void) if (ret >= 0) { if (S_ISDIR(st.st_mode)) { write_or_die(dent->d_name, strlen(dent->d_name) + 1); - copy_event_system(sys); + copy_event_system(sys, tps); } } free(sys); @@ -450,9 +478,27 @@ static void read_ftrace_printk(void) } -void read_tracing_data(void) +static struct tracepoint_path * +get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters) +{ + struct tracepoint_path path, *ppath = &path; + int i; + + for (i = 0; i < nb_counters; i++) { + if (pattrs[i].type != PERF_TYPE_TRACEPOINT) + continue; + ppath->next = tracepoint_id_to_path(pattrs[i].config); + if (!ppath->next) + die("%s\n", "No memory to alloc tracepoints list"); + ppath = ppath->next; + } + + return path.next; +} +void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters) { char buf[BUFSIZ]; + struct tracepoint_path *tps; output_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); if (output_fd < 0) @@ -483,9 +529,11 @@ void read_tracing_data(void) page_size = getpagesize(); write_or_die(&page_size, 4); + tps = get_tracepoints_path(pattrs, nb_counters); + read_header_files(); - read_ftrace_files(); - read_event_files(); + read_ftrace_files(tps); + read_event_files(tps); read_proc_kallsyms(); read_ftrace_printk(); } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index b53b27f34e4e..a6577cdd9afd 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -29,6 +29,7 @@ #include #undef _GNU_SOURCE +#include "../perf.h" #include "util.h" #include "trace-event.h" diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1dac301ae54f..b12e4903739a 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -36,6 +36,7 @@ #include #include +#include "../perf.h" #include "util.h" #include "trace-event.h" diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 3ddb8947be8a..051fcf363825 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -1,6 +1,7 @@ -#ifndef _PARSE_EVENTS_H -#define _PARSE_EVENTS_H +#ifndef _TRACE_EVENTS_H +#define _TRACE_EVENTS_H +#include "parse-events.h" #define __unused __attribute__((unused)) @@ -233,6 +234,6 @@ extern int header_page_data_size; int parse_header_page(char *buf, unsigned long size); -void read_tracing_data(void); +void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters); -#endif /* _PARSE_EVENTS_H */ +#endif /* _TRACE_EVENTS_H */ From d498bc1f6261dd6f655440eb2f1c7fa25694d3ba Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 28 Aug 2009 04:46:07 +0200 Subject: [PATCH 35/60] perf tools: Fix missing string field printing in perf trace Some string fields are not printed because of a missing printf in the post-processing. Before: perf-10070 [000] 0.000000: sched_switch: task :10070 [120] (R) ==> :5720 [120] geany-5720 [000] 0.000000: sched_switch: task :5720 [120] (S) ==> :10070 [120] perf-10070 [000] 0.000000: sched_switch: task :10070 [120] (R) ==> :5720 [120] geany-5720 [000] 0.000000: sched_switch: task :5720 [120] (S) ==> :10070 [120] -0 [000] 0.000000: sched_switch: task :0 [140] (R) ==> :361 [115] After: perf-10070 [000] 0.000000: sched_switch: task perf:10070 [120] (R) ==> geany:5720 [120] geany-5720 [000] 0.000000: sched_switch: task geany:5720 [120] (S) ==> perf:10070 [120] perf-10070 [000] 0.000000: sched_switch: task perf:10070 [120] (R) ==> geany:5720 [120] geany-5720 [000] 0.000000: sched_switch: task geany:5720 [120] (S) ==> perf:10070 [120] -0 [000] 0.000000: sched_switch: task swapper:0 [140] (R) ==> kondemand/1:361 [115] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt LKML-Reference: <1251427567-10551-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-parse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index a6577cdd9afd..96c5e97ffe7b 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1997,6 +1997,7 @@ static void print_str_arg(void *data, int size, memcpy(str, data + arg->field.field->offset, arg->field.field->size); str[arg->field.field->size] = 0; + printf("%s", str); free(str); break; case PRINT_FLAGS: From 119e7a22bb70d84849384e5113792cd45afa4f85 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 27 Aug 2009 09:59:02 +0200 Subject: [PATCH 36/60] perf tools: do not complain if root is owning perf.data This improves patch fa6963b24 so that perf.data stuff that has been dumped as root can be read (annotate/report) by a user without the use of the --force. Rationale is that root has plenty of ways to screw us (usually) that do not require twisted schemes involving specially crafting a perf.data. Signed-off-by: Pierre Habouzit Cc: Paul Mackerras Cc: Peter Zijlstra Cc: LKML-Reference: <20090827075902.GF19653@laphroaig.corp> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-report.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4ac618b34254..4c7bc4436236 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -984,8 +984,8 @@ static int __cmd_annotate(void) exit(-1); } - if (!force && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user\n", input_name); + if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { + fprintf(stderr, "file: %s not owned by current user or root\n", input_name); exit(-1); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d2e28820ee60..ea6328a893cc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1405,8 +1405,8 @@ static int __cmd_report(void) exit(-1); } - if (!force && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user\n", input_name); + if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { + fprintf(stderr, "file: %s not owned by current user or root\n", input_name); exit(-1); } From ec7ba4ea1d605029fb09601ab4ad3053bc1f519c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 31 Aug 2009 03:32:03 +0200 Subject: [PATCH 37/60] perf tools: Add missing parameters documentation Add missing documentation for the following parameters: - perf record -R - perf report -g Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <1251682323-10395-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-record.txt | 4 ++++ tools/perf/Documentation/perf-report.txt | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6be696b0a2bb..0ff23de9e453 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -91,6 +91,10 @@ OPTIONS --no-samples:: Don't sample. +-R:: +--raw-samples:: +Collect raw sample records from all opened counters (typically for tracepoint counters). + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 370344afb5b2..59f0b846cd71 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -51,6 +51,16 @@ OPTIONS all occurances of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. +-g [type,min]:: +--call-graph:: + Display callchains using type and min percent threshold. + type can be either: + - flat: single column, linear exposure of callchains. + - graph: use a graph tree, displaying absolute overhead rates. + - fractal: like graph, but displays relative rates. Each branch of + the tree is considered as a new profiled object. + + Default: fractal,0.5. + SEE ALSO -------- linkperf:perf-stat[1] From 5b447a6a13ea823b698bf4c01193654fd7ebf4ec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 31 Aug 2009 06:45:18 +0200 Subject: [PATCH 38/60] perf tools: Librarize idle thread registration Librarize register_idle_thread() used by annotate and report. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <1251693921-6579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 13 +------------ tools/perf/builtin-report.c | 32 +++++++++++++++----------------- tools/perf/util/thread.c | 13 +++++++++++++ tools/perf/util/thread.h | 2 ++ 4 files changed, 31 insertions(+), 29 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4c7bc4436236..043d85b7e254 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -472,17 +472,6 @@ static void output__resort(void) } } -static void register_idle_thread(void) -{ - struct thread *thread = threads__findnew(0, &threads, &last_match); - - if (thread == NULL || - thread__set_comm(thread, "[idle]")) { - fprintf(stderr, "problem inserting idle task.\n"); - exit(-1); - } -} - static unsigned long total = 0, total_mmap = 0, total_comm = 0, @@ -970,7 +959,7 @@ static int __cmd_annotate(void) uint32_t size; char *buf; - register_idle_thread(); + register_idle_thread(&threads, &last_match); input = open(input_name, O_RDONLY); if (input < 0) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cdd46ab11bd5..cdf9a8d27bb9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -666,12 +666,9 @@ static void dso__calc_col_width(struct dso *self) self->slen_calculated = 1; } -static int thread__set_comm_adjust(struct thread *self, const char *comm) +static void thread__comm_adjust(struct thread *self) { - int ret = thread__set_comm(self, comm); - - if (ret) - return ret; + char *comm = self->comm; if (!col_width_list_str && !field_sep && (!comm_list || strlist__has_entry(comm_list, comm))) { @@ -682,6 +679,16 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm) threads__col_width = slen + 6; } } +} + +static int thread__set_comm_adjust(struct thread *self, const char *comm) +{ + int ret = thread__set_comm(self, comm); + + if (ret) + return ret; + + thread__comm_adjust(self); return 0; } @@ -1073,17 +1080,6 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) return ret; } -static void register_idle_thread(void) -{ - struct thread *thread = threads__findnew(0, &threads, &last_match); - - if (thread == NULL || - thread__set_comm_adjust(thread, "[idle]")) { - fprintf(stderr, "problem inserting idle task.\n"); - exit(-1); - } -} - static unsigned long total = 0, total_mmap = 0, total_comm = 0, @@ -1381,11 +1377,13 @@ static int __cmd_report(void) unsigned long offset = 0; unsigned long head, shift; struct stat input_stat; + struct thread *idle; event_t *event; uint32_t size; char *buf; - register_idle_thread(); + idle = register_idle_thread(&threads, &last_match); + thread__comm_adjust(idle); if (show_threads) perf_read_values_init(&show_threads_values); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index f98032c135c6..3acd37f54888 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -80,6 +80,19 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) return th; } +struct thread * +register_idle_thread(struct rb_root *threads, struct thread **last_match) +{ + struct thread *thread = threads__findnew(0, threads, last_match); + + if (!thread || thread__set_comm(thread, "[idle]")) { + fprintf(stderr, "problem inserting idle task.\n"); + exit(-1); + } + + return thread; +} + void thread__insert_map(struct thread *self, struct map *map) { struct map *pos, *tmp; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index b1c66719379b..634f2809a342 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -13,6 +13,8 @@ struct thread { int thread__set_comm(struct thread *self, const char *comm); struct thread * threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); +struct thread * +register_idle_thread(struct rb_root *threads, struct thread **last_match); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); struct map *thread__find_map(struct thread *self, u64 ip); From 3a2684ca58e06941ff00e3f096ca44f898a6d13e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 31 Aug 2009 06:45:19 +0200 Subject: [PATCH 39/60] perf tools: Resolve idle thread cmdline for perf trace The cmd-trace tool used the cmdline file and resolved the idle thread using a hardcoded check for the 0 task pid. Now we have a centralized way to do that from perf using register_idle_thread() API. Before: :0-0 [000] 0.000000: irq_handler_entry: irq=0 handler=name :0-0 [000] 0.000000: irq_handler_entry: irq=0 handler=name After: [idle]-0 [000] 0.000000: irq_handler_entry: irq=0 handler=name [idle]-0 [000] 0.000000: irq_handler_entry: irq=0 handler=name Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <1251693921-6579-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index dd3c2e7c9a18..8247fd04745a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -164,6 +164,7 @@ static int __cmd_trace(void) char *buf; trace_report(); + register_idle_thread(&threads, &last_match); input = open(input_name, O_RDONLY); if (input < 0) { From 9b8055a52c8986167e0a7357460d528a00db67e6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 31 Aug 2009 06:45:20 +0200 Subject: [PATCH 40/60] perf tools: Unify swapper tasks naming In perf tools, we hardcode the pid 0 cmdline resolving to "idle" because the init task is not included in the COMM events. But the idle tasks secondary cpus are resolved into their "init" name through the COMM events. We have then such strange result in perf report (ditto with trace): 19.66% init [kernel] [k] acpi_idle_enter_c1 17.32% [idle] [kernel] [k] acpi_idle_enter_c1 It's then better to unify the swapper tasks into a single init name. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <1251693921-6579-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 3acd37f54888..7635928ca278 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -85,7 +85,7 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match) { struct thread *thread = threads__findnew(0, threads, last_match); - if (!thread || thread__set_comm(thread, "[idle]")) { + if (!thread || thread__set_comm(thread, "[init]")) { fprintf(stderr, "problem inserting idle task.\n"); exit(-1); } From 561f732c1233f6bf7c3c5c5fb9b4d90bb6c107aa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 31 Aug 2009 06:45:21 +0200 Subject: [PATCH 41/60] perf tools: Complete support for dynamic strings Complete support for __str_loc type strings of ftrace events which have dynamic offsets values set for each of them inside their sammples. Before: geany-5759 [000] 0.000000: lock_release: name geany-5759 [000] 0.000000: lock_release: name geany-5759 [000] 0.000000: lock_release: name kondemand/0-362 [000] 0.000000: lock_release: name pdflush-421 [000] 0.000000: lock_release: name After: geany-5759 [000] 0.000000: lock_release: &u->lock geany-5759 [000] 0.000000: lock_release: key geany-5759 [000] 0.000000: lock_release: &group->notification_mutex kondemand/0-362 [000] 0.000000: lock_release: &rq->lock pdflush-421 [000] 0.000000: lock_release: &rq->lock Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt LKML-Reference: <1251693921-6579-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt --- tools/perf/util/trace-event-parse.c | 16 ++++++++++++++-- tools/perf/util/trace-event.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 96c5e97ffe7b..665dac20cd1e 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1548,6 +1548,7 @@ process_str(struct event *event __unused, struct print_arg *arg, char **tok) arg->type = PRINT_STRING; arg->string.string = token; + arg->string.offset = -1; if (read_expected(EVENT_DELIM, (char *)")") < 0) return EVENT_ERROR; @@ -2031,9 +2032,20 @@ static void print_str_arg(void *data, int size, case PRINT_TYPE: break; - case PRINT_STRING: - printf("%s", arg->string.string); + case PRINT_STRING: { + int str_offset; + + if (arg->string.offset == -1) { + struct format_field *f; + + f = find_any_field(event, arg->string.string); + arg->string.offset = f->offset; + } + str_offset = *(int *)(data + arg->string.offset); + str_offset &= 0xffff; + printf("%s", ((char *)data) + str_offset); break; + } case PRINT_OP: /* * The only op for string should be ? : diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 051fcf363825..420294a5773e 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -50,6 +50,7 @@ struct print_arg_atom { struct print_arg_string { char *string; + int offset; }; struct print_arg_field { From 61562445c80452ec417fb6a6895b991f6c1dd930 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Sep 2009 14:50:23 +0200 Subject: [PATCH 42/60] perf tools: Clean up warnings list in the Makefile Make it easier to turn warnings on/off by using a separate line for each warning added. Some of the warnings have too much of a nuisance factor and we might want to turn them off in the future. Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index c481a513f551..9f8d207a91bf 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -169,7 +169,30 @@ endif # # Include saner warnings here, which can catch bugs: # -EXTRA_WARNINGS = -Wcast-align -Wformat -Wformat-security -Wformat-y2k -Wshadow -Winit-self -Wpacked -Wredundant-decls -Wstack-protector -Wstrict-aliasing=3 -Wswitch-default -Wswitch-enum -Wno-system-headers -Wundef -Wvolatile-register-var -Wwrite-strings -Wbad-function-cast -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wstrict-prototypes -Wdeclaration-after-statement + +EXTRA_WARNINGS := -Wcast-align +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wformat +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wformat-security +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wformat-y2k +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wshadow +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Winit-self +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wpacked +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wredundant-decls +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstack-protector +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-aliasing=3 +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-default +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wswitch-enum +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wno-system-headers +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wundef +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wvolatile-register-var +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wwrite-strings +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wbad-function-cast +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wmissing-declarations +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wmissing-prototypes +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wnested-externs +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wold-style-definition +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes +EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement CFLAGS = $(M64) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) LDFLAGS = -lpthread -lrt -lelf -lm From 65014ab36196f6d86edc9ee23759d6930b9d89a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Sep 2009 14:55:55 +0200 Subject: [PATCH 43/60] perf tools: Work around strict aliasing related warnings Older versions of GCC are rather stupid about strict aliasing: util/trace-event-parse.c: In function 'parse_cmdlines': util/trace-event-parse.c:93: warning: dereferencing type-punned pointer will break strict-aliasing rules util/trace-event-parse.c: In function 'parse_proc_kallsyms': util/trace-event-parse.c:155: warning: dereferencing type-punned pointer will break strict-aliasing rules util/trace-event-parse.c:157: warning: dereferencing type-punned pointer will break strict-aliasing rules util/trace-event-parse.c:158: warning: dereferencing type-punned pointer will break strict-aliasing rules util/trace-event-parse.c: In function 'parse_ftrace_printk': util/trace-event-parse.c:294: warning: dereferencing type-punned pointer will break strict-aliasing rules util/trace-event-parse.c:295: warning: dereferencing type-punned pointer will break strict-aliasing rules make: *** [util/trace-event-parse.o] Error 1 Make it clear to GCC that we intend with those pointers, by passing them through via an explicit (void *) cast. We might want to add -fno-strict-aliasing as well, like the kernel itself does. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-info.c | 2 +- tools/perf/util/trace-event-parse.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 81615279b876..6c9302a7274c 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -188,7 +188,7 @@ int bigendian(void) unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; unsigned int *ptr; - ptr = (unsigned int *)str; + ptr = (unsigned int *)(void *)str; return *ptr == 0x01020304; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 665dac20cd1e..37b10c2cd3c9 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -90,7 +90,7 @@ void parse_cmdlines(char *file, int size __unused) while (line) { item = malloc_or_die(sizeof(*item)); sscanf(line, "%d %as", &item->pid, - (float *)&item->comm); /* workaround gcc warning */ + (float *)(void *)&item->comm); /* workaround gcc warning */ item->next = list; list = item; line = strtok_r(NULL, "\n", &next); @@ -152,10 +152,10 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused) item = malloc_or_die(sizeof(*item)); item->mod = NULL; ret = sscanf(line, "%as %c %as\t[%as", - (float *)&addr_str, /* workaround gcc warning */ + (float *)(void *)&addr_str, /* workaround gcc warning */ &ch, - (float *)&item->func, - (float *)&item->mod); + (float *)(void *)&item->func, + (float *)(void *)&item->mod); item->addr = strtoull(addr_str, NULL, 16); free(addr_str); @@ -291,8 +291,8 @@ void parse_ftrace_printk(char *file, unsigned int size __unused) while (line) { item = malloc_or_die(sizeof(*item)); ret = sscanf(line, "%as : %as", - (float *)&addr_str, /* workaround gcc warning */ - (float *)&item->printk); + (float *)(void *)&addr_str, /* workaround gcc warning */ + (float *)(void *)&item->printk); item->addr = strtoull(addr_str, NULL, 16); free(addr_str); From cd6feeeafddbef6abfe4d90fb26e42fd844d34ed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Sep 2009 20:20:38 +0200 Subject: [PATCH 44/60] perf trace: Sample the CPU too Sample, record, parse and print the CPU field - it had all zeroes before. Before (watch the second column, the CPU values): perf-32685 [000] 0.000000: sched_wakeup_new: task perf:32686 [120] success=1 [011] perf-32685 [000] 0.000000: sched_migrate_task: task perf:32685 [120] from: 1 to: 11 perf-32685 [000] 0.000000: sched_process_fork: parent perf:32685 child perf:32686 true-32686 [000] 0.000000: sched_wakeup: task migration/11:25 [0] success=1 [011] true-32686 [000] 0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015] true-32686 [000] 0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015] perf-32685 [000] 0.000000: sched_switch: task perf:32685 [120] (S) ==> swapper:0 [140] true-32686 [000] 0.000000: sched_switch: task perf:32686 [120] (R) ==> migration/11:25 [0] true-32686 [000] 0.000000: sched_switch: task perf:32686 [120] (R) ==> distccd:12793 [125] true-32686 [000] 0.000000: sched_switch: task true:32686 [120] (R) ==> distccd:12793 [125] true-32686 [000] 0.000000: sched_process_exit: task true:32686 [120] true-32686 [000] 0.000000: sched_stat_wait: task: distccd:12793 wait: 6767985949080 [ns] true-32686 [000] 0.000000: sched_stat_wait: task: distccd:12793 wait: 6767986139446 [ns] true-32686 [000] 0.000000: sched_stat_sleep: task: distccd:12793 sleep: 132844 [ns] true-32686 [000] 0.000000: sched_stat_sleep: task: distccd:12793 sleep: 131724 [ns] After: perf-32685 [001] 0.000000: sched_wakeup_new: task perf:32686 [120] success=1 [011] perf-32685 [001] 0.000000: sched_migrate_task: task perf:32685 [120] from: 1 to: 11 perf-32685 [001] 0.000000: sched_process_fork: parent perf:32685 child perf:32686 true-32686 [011] 0.000000: sched_wakeup: task migration/11:25 [0] success=1 [011] true-32686 [015] 0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015] true-32686 [015] 0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015] perf-32685 [001] 0.000000: sched_switch: task perf:32685 [120] (S) ==> swapper:0 [140] true-32686 [011] 0.000000: sched_switch: task perf:32686 [120] (R) ==> migration/11:25 [0] true-32686 [015] 0.000000: sched_switch: task perf:32686 [120] (R) ==> distccd:12793 [125] true-32686 [015] 0.000000: sched_switch: task true:32686 [120] (R) ==> distccd:12793 [125] true-32686 [015] 0.000000: sched_process_exit: task true:32686 [120] true-32686 [015] 0.000000: sched_stat_wait: task: distccd:12793 wait: 6767985949080 [ns] true-32686 [015] 0.000000: sched_stat_wait: task: distccd:12793 wait: 6767986139446 [ns] true-32686 [015] 0.000000: sched_stat_sleep: task: distccd:12793 sleep: 132844 [ns] true-32686 [015] 0.000000: sched_stat_sleep: task: distccd:12793 sleep: 131724 [ns] So we can now see how this workload migrated between CPUs. Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Li Zefan Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 4 +++- tools/perf/builtin-trace.c | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index add514d53d2e..ff93f8ecba28 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -403,8 +403,10 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; - if (raw_samples) + if (raw_samples) { attr->sample_type |= PERF_SAMPLE_RAW; + attr->sample_type |= PERF_SAMPLE_CPU; + } attr->mmap = track; attr->comm = track; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8247fd04745a..bbe4c444ef8f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -58,12 +58,19 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) struct dso *dso = NULL; struct thread *thread; u64 ip = event->ip.ip; + u32 cpu = -1; u64 period = 1; void *more_data = event->ip.__more_data; int cpumode; thread = threads__findnew(event->ip.pid, &threads, &last_match); + if (sample_type & PERF_SAMPLE_CPU) { + cpu = *(u32 *)more_data; + more_data += sizeof(u32); + more_data += sizeof(u32); /* reserved */ + } + if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; more_data += sizeof(u64); @@ -120,7 +127,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) * field, although it should be the same than this perf * event pid */ - print_event(0, raw->data, raw->size, 0, thread->comm); + print_event(cpu, raw->data, raw->size, 0, thread->comm); } total += period; From 0fbdea19e9394a5cb5f2f5081b028c50b558910a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 Sep 2009 21:46:00 +0200 Subject: [PATCH 45/60] perf_counter: Introduce new (non-)paranoia level to allow raw tracepoint access I want to sample inherited tracepoint workloads as a normal user and the CAP_SYS_ADMIN check prevents me from doing that right now. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d988dfb4bbab..0aa609f69103 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -46,12 +46,18 @@ static atomic_t nr_task_counters __read_mostly; /* * perf counter paranoia level: - * 0 - not paranoid - * 1 - disallow cpu counters to unpriv - * 2 - disallow kernel profiling to unpriv + * -1 - not paranoid at all + * 0 - disallow raw tracepoint access for unpriv + * 1 - disallow cpu counters for unpriv + * 2 - disallow kernel profiling for unpriv */ int sysctl_perf_counter_paranoid __read_mostly = 1; +static inline bool perf_paranoid_tracepoint_raw(void) +{ + return sysctl_perf_counter_paranoid > -1; +} + static inline bool perf_paranoid_cpu(void) { return sysctl_perf_counter_paranoid > 0; @@ -3971,6 +3977,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) * have these. */ if ((counter->attr.sample_type & PERF_SAMPLE_RAW) && + perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); From 6ddf259da76cab6555c2086386f8bcd10bbb86d2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Sep 2009 12:00:22 +0200 Subject: [PATCH 46/60] perf trace: Sample timestamps as well Before: perf-21082 [013] 0.000000: sched_wakeup_new: task perf:21083 [120] success=1 [015] perf-21082 [013] 0.000000: sched_migrate_task: task perf:21082 [120] from: 13 to: 15 perf-21082 [013] 0.000000: sched_process_fork: parent perf:21082 child perf:21083 true-21083 [015] 0.000000: sched_wakeup: task migration/15:33 [0] success=1 [015] perf-21082 [013] 0.000000: sched_switch: task perf:21082 [120] (S) ==> swapper:0 [140] true-21083 [015] 0.000000: sched_switch: task perf:21083 [120] (R) ==> migration/15:33 [0] true-21083 [011] 0.000000: sched_process_exit: task true:21083 [120] After: perf-21082 [013] 14674.797613: sched_wakeup_new: task perf:21083 [120] success=1 [015] perf-21082 [013] 14674.797506: sched_migrate_task: task perf:21082 [120] from: 13 to: 15 perf-21082 [013] 14674.797610: sched_process_fork: parent perf:21082 child perf:21083 true-21083 [015] 14674.797725: sched_wakeup: task migration/15:33 [0] success=1 [015] perf-21082 [013] 14674.797722: sched_switch: task perf:21082 [120] (S) ==> swapper:0 [140] true-21083 [015] 14674.797729: sched_switch: task perf:21083 [120] (R) ==> migration/15:33 [0] true-21083 [011] 14674.798159: sched_process_exit: task true:21083 [120] Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 1 + tools/perf/builtin-trace.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ff93f8ecba28..99a12fe86e9f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -404,6 +404,7 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; if (raw_samples) { + attr->sample_type |= PERF_SAMPLE_TIME; attr->sample_type |= PERF_SAMPLE_RAW; attr->sample_type |= PERF_SAMPLE_CPU; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bbe4c444ef8f..d59bf8a86743 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -58,6 +58,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) struct dso *dso = NULL; struct thread *thread; u64 ip = event->ip.ip; + u64 timestamp = -1; u32 cpu = -1; u64 period = 1; void *more_data = event->ip.__more_data; @@ -65,6 +66,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->ip.pid, &threads, &last_match); + if (sample_type & PERF_SAMPLE_TIME) { + timestamp = *(u64 *)more_data; + more_data += sizeof(u64); + } + if (sample_type & PERF_SAMPLE_CPU) { cpu = *(u32 *)more_data; more_data += sizeof(u32); @@ -127,7 +133,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) * field, although it should be the same than this perf * event pid */ - print_event(cpu, raw->data, raw->size, 0, thread->comm); + print_event(cpu, raw->data, raw->size, timestamp, thread->comm); } total += period; From 8886f42d6d8dabeb488c706c339634a0e3e08df4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Sep 2009 16:19:57 +0200 Subject: [PATCH 47/60] perf trace: Fix parsing of perf.data We started parsing perf.data at head 0. This caused -D to segfault and it could possibly also case incorrect trace entries to be displayed. Parse it at data_offset instead. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d59bf8a86743..914ab366e369 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -196,6 +196,7 @@ static int __cmd_trace(void) exit(0); } header = perf_header__read(input); + head = header->data_offset; sample_type = perf_header__sample_type(header); if (!(sample_type & PERF_SAMPLE_RAW)) From 2e01d1791168bb824226d8cc70e50374767dcc42 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Sep 2009 16:21:11 +0200 Subject: [PATCH 48/60] perf tools: Seek to the end of the header area Leave the input fd at the data area. It does not matter right now - but seeking at the end of it certainly did not make sense. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a37a2221a0c3..ec4d4c2f9522 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -237,7 +237,7 @@ struct perf_header *perf_header__read(int fd) self->data_offset = f_header.data.offset; self->data_size = f_header.data.size; - lseek(fd, self->data_offset + self->data_size, SEEK_SET); + lseek(fd, self->data_offset, SEEK_SET); self->frozen = 1; From 00fc97863c21c41e257a941e83410c56341e2a5d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Sep 2009 16:22:02 +0200 Subject: [PATCH 49/60] perf trace: Print out in nanoseconds Print out more accurate timestamps - usecs does not cut it anymore on fast enough boxes ;-) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-parse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 37b10c2cd3c9..629e602d9405 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2708,9 +2708,9 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, return pretty_print_func_graph(data, size, event, cpu, pid, comm, secs, usecs); - printf("%16s-%-5d [%03d] %5lu.%06lu: %s: ", + printf("%16s-%-5d [%03d] %5lu.%09Lu: %s: ", comm, pid, cpu, - secs, usecs, event->name); + secs, nsecs, event->name); pretty_print(data, size, event); printf("\n"); From 6f4596d9312ba5fbf5f3231ef484823c4e684d2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Sep 2009 16:22:45 +0200 Subject: [PATCH 50/60] perf trace: Fix read_string() We did not account for the enclosing \0. Depending on what malloc() gave us this resulted in corrupted version string printouts. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/trace-event-read.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index b12e4903739a..a1217a10632f 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -113,8 +113,11 @@ static char *read_string(void) } } + /* trailing \0: */ + i++; + /* move the file descriptor to the end of the string */ - r = lseek(input_fd, -(r - (i+1)), SEEK_CUR); + r = lseek(input_fd, -(r - i), SEEK_CUR); if (r < 0) die("lseek"); From dc86cabe4b242446ea9aa8492c727e1220817898 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Sep 2009 18:03:00 +0200 Subject: [PATCH 51/60] perf_counter: Fix output-sharing error path We forget to release the fd in the PERF_FLAG_FD_OUTPUT error path. Reorganize the error flow here to be a clean fall-through logic. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_counter.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0aa609f69103..e0d91fdf0c3c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -4316,15 +4316,15 @@ SYSCALL_DEFINE5(perf_counter_open, struct file *group_file = NULL; int fput_needed = 0; int fput_needed2 = 0; - int ret; + int err; /* for future expandability... */ if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) return -EINVAL; - ret = perf_copy_attr(attr_uptr, &attr); - if (ret) - return ret; + err = perf_copy_attr(attr_uptr, &attr); + if (err) + return err; if (!attr.exclude_kernel) { if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) @@ -4348,7 +4348,7 @@ SYSCALL_DEFINE5(perf_counter_open, */ group_leader = NULL; if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { - ret = -EINVAL; + err = -EINVAL; group_file = fget_light(group_fd, &fput_needed); if (!group_file) goto err_put_context; @@ -4377,22 +4377,22 @@ SYSCALL_DEFINE5(perf_counter_open, counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, NULL, GFP_KERNEL); - ret = PTR_ERR(counter); + err = PTR_ERR(counter); if (IS_ERR(counter)) goto err_put_context; - ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); - if (ret < 0) + err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); + if (err < 0) goto err_free_put_context; - counter_file = fget_light(ret, &fput_needed2); + counter_file = fget_light(err, &fput_needed2); if (!counter_file) goto err_free_put_context; if (flags & PERF_FLAG_FD_OUTPUT) { - ret = perf_counter_set_output(counter, group_fd); - if (ret) - goto err_free_put_context; + err = perf_counter_set_output(counter, group_fd); + if (err) + goto err_fput_free_put_context; } counter->filp = counter_file; @@ -4408,20 +4408,20 @@ SYSCALL_DEFINE5(perf_counter_open, list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); mutex_unlock(¤t->perf_counter_mutex); +err_fput_free_put_context: fput_light(counter_file, fput_needed2); -out_fput: - fput_light(group_file, fput_needed); - - return ret; - err_free_put_context: - kfree(counter); + if (err < 0) + kfree(counter); err_put_context: - put_ctx(ctx); + if (err < 0) + put_ctx(ctx); - goto out_fput; + fput_light(group_file, fput_needed); + + return err; } /* From 747b50aaf728987732e6ff3ba10aba4acc4e0277 Mon Sep 17 00:00:00 2001 From: "markus.t.metzger@intel.com" Date: Wed, 2 Sep 2009 16:04:46 +0200 Subject: [PATCH 52/60] x86, perf_counter, bts: Fail if BTS is not available Reserve PERF_COUNT_HW_BRANCH_INSTRUCTIONS with sample_period == 1 for BTS tracing and fail, if BTS is not available. Signed-off-by: Markus Metzger Acked-by: Peter Zijlstra LKML-Reference: <20090902140612.943801000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 396e35db7058..2f41874ffb86 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -769,7 +769,7 @@ static int reserve_bts_hardware(void) int cpu, err = 0; if (!bts_available()) - return -EOPNOTSUPP; + return 0; get_online_cpus(); @@ -914,7 +914,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (!reserve_pmc_hardware()) err = -EBUSY; else - reserve_bts_hardware(); + err = reserve_bts_hardware(); } if (!err) atomic_inc(&active_counters); @@ -979,6 +979,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (config == -1LL) return -EINVAL; + /* + * Branch tracing: + */ + if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && + (hwc->sample_period == 1) && !bts_available()) + return -EOPNOTSUPP; + hwc->config |= config; return 0; @@ -1355,19 +1362,9 @@ static int x86_pmu_enable(struct perf_counter *counter) idx = fixed_mode_idx(counter, hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { - /* - * Try to use BTS for branch tracing. If that is not - * available, try to get a generic counter. - */ - if (unlikely(!cpuc->ds)) - goto try_generic; - - /* - * Try to get the fixed counter, if that is already taken - * then try to get a generic counter: - */ + /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; + return -EAGAIN; hwc->config_base = 0; hwc->counter_base = 0; From 596da17f94c103348ebe04129c00d536ea0e80e2 Mon Sep 17 00:00:00 2001 From: "markus.t.metzger@intel.com" Date: Wed, 2 Sep 2009 16:04:47 +0200 Subject: [PATCH 53/60] x86, perf_counter, bts: Correct pointer-to-u64 casts On 32bit, pointers in the DS AREA configuration are cast to u64. The current (long) cast to avoid compiler warnings results in a signed 64bit address. Signed-off-by: Markus Metzger Acked-by: Peter Zijlstra LKML-Reference: <20090902140615.305889000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 2f41874ffb86..3776b0b630c8 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -726,7 +726,8 @@ static inline void init_debug_store_on_cpu(int cpu) return; wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(long)ds), (u32)((u64)(long)ds >> 32)); + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); } static inline void fini_debug_store_on_cpu(int cpu) @@ -757,7 +758,7 @@ static void release_bts_hardware(void) per_cpu(cpu_hw_counters, cpu).ds = NULL; - kfree((void *)(long)ds->bts_buffer_base); + kfree((void *)(unsigned long)ds->bts_buffer_base); kfree(ds); } @@ -788,7 +789,7 @@ static int reserve_bts_hardware(void) break; } - ds->bts_buffer_base = (u64)(long)buffer; + ds->bts_buffer_base = (u64)(unsigned long)buffer; ds->bts_index = ds->bts_buffer_base; ds->bts_absolute_maximum = ds->bts_buffer_base + BTS_BUFFER_SIZE; @@ -1491,7 +1492,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, }; struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; unsigned long orig_ip = data->regs->ip; - u64 at; + struct bts_record *at, *top; if (!counter) return; @@ -1499,19 +1500,18 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, if (!ds) return; - for (at = ds->bts_buffer_base; - at < ds->bts_index; - at += sizeof(struct bts_record)) { - struct bts_record *rec = (struct bts_record *)(long)at; + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; - data->regs->ip = rec->from; - data->addr = rec->to; + ds->bts_index = ds->bts_buffer_base; + + for (; at < top; at++) { + data->regs->ip = at->from; + data->addr = at->to; perf_counter_output(counter, 1, data); } - ds->bts_index = ds->bts_buffer_base; - data->regs->ip = orig_ip; data->addr = 0; From 1653192f510bd8114b7b133d7289e6e5c3e95046 Mon Sep 17 00:00:00 2001 From: "markus.t.metzger@intel.com" Date: Wed, 2 Sep 2009 16:04:48 +0200 Subject: [PATCH 54/60] x86, perf_counter, bts: Do not allow kernel BTS tracing for now Kernel BTS tracing generates too much data too fast for us to handle, causing the kernel to hang. Fail for BTS requests for kernel code. Signed-off-by: Markus Metzger Acked-by: Peter Zijlstra LKML-Reference: <20090902140616.901253000@intel.com> [ This is really a workaround - but we want BTS tracing in .32 so make sure we dont regress. The lockup should be fixed ASAP. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3776b0b630c8..f9cd0849bd42 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -984,8 +984,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) * Branch tracing: */ if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && - (hwc->sample_period == 1) && !bts_available()) - return -EOPNOTSUPP; + (hwc->sample_period == 1)) { + /* BTS is not supported by this architecture. */ + if (!bts_available()) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + return -EOPNOTSUPP; + } hwc->config |= config; From 506d4bc8d5dab20d84624aa07cdc6dcd77915d52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Sep 2009 15:36:12 +0200 Subject: [PATCH 55/60] perf stat: Change noise calculation to use stddev The current noise computation does: \Sum abs(n_i - avg(n)) * N^-1.5 Which is (afaik) not a regular noise function, and needs the complete sample set available to post-process. Change this to use a regular stddev computation which can be done by keeping a two sums: stddev = sqrt( 1/N (\Sum n_i^2) - avg(n)^2 ) For which we only need to keep \Sum n_i and \Sum n_i^2. Signed-off-by: Peter Zijlstra Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 170 ++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 101 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1a2626230660..31ffc4d3ba60 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -83,19 +83,32 @@ static u64 runtime_cycles[MAX_RUN]; static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; -static u64 event_res_avg[MAX_COUNTERS][3]; -static u64 event_res_noise[MAX_COUNTERS][3]; +struct stats +{ + double sum; + double sum_sq; +}; -static u64 event_scaled_avg[MAX_COUNTERS]; +static double avg_stats(struct stats *stats) +{ + return stats->sum / run_count; +} -static u64 runtime_nsecs_avg; -static u64 runtime_nsecs_noise; +/* + * stddev = sqrt(1/N (\Sum n_i^2) - avg(n)^2) + */ +static double stddev_stats(struct stats *stats) +{ + double avg = stats->sum / run_count; -static u64 walltime_nsecs_avg; -static u64 walltime_nsecs_noise; + return sqrt(stats->sum_sq/run_count - avg*avg); +} -static u64 runtime_cycles_avg; -static u64 runtime_cycles_noise; +struct stats event_res_stats[MAX_COUNTERS][3]; +struct stats event_scaled_stats[MAX_COUNTERS]; +struct stats runtime_nsecs_stats; +struct stats walltime_nsecs_stats; +struct stats runtime_cycles_stats; #define MATCH_EVENT(t, c, counter) \ (attrs[counter].type == PERF_TYPE_##t && \ @@ -279,42 +292,37 @@ static int run_perf_stat(int argc __used, const char **argv) return WEXITSTATUS(status); } -static void print_noise(u64 *count, u64 *noise) +static void print_noise(double avg, double stddev) { if (run_count > 1) - fprintf(stderr, " ( +- %7.3f%% )", - (double)noise[0]/(count[0]+1)*100.0); + fprintf(stderr, " ( +- %7.3f%% )", 100*stddev / avg); } -static void nsec_printout(int counter, u64 *count, u64 *noise) +static void nsec_printout(int counter, double avg, double stddev) { - double msecs = (double)count[0] / 1000000; + double msecs = avg / 1e6; fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { - if (walltime_nsecs_avg) - fprintf(stderr, " # %10.3f CPUs ", - (double)count[0] / (double)walltime_nsecs_avg); + fprintf(stderr, " # %10.3f CPUs ", + avg / avg_stats(&walltime_nsecs_stats)); } - print_noise(count, noise); + print_noise(avg, stddev); } -static void abs_printout(int counter, u64 *count, u64 *noise) +static void abs_printout(int counter, double avg, double stddev) { - fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); + fprintf(stderr, " %14.0f %-24s", avg, event_name(counter)); - if (runtime_cycles_avg && - MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { + if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { fprintf(stderr, " # %10.3f IPC ", - (double)count[0] / (double)runtime_cycles_avg); + avg / avg_stats(&runtime_cycles_stats)); } else { - if (runtime_nsecs_avg) { - fprintf(stderr, " # %10.3f M/sec", - (double)count[0]/runtime_nsecs_avg*1000.0); - } + fprintf(stderr, " # %10.3f M/sec", + 1000.0 * avg / avg_stats(&runtime_nsecs_stats)); } - print_noise(count, noise); + print_noise(avg, stddev); } /* @@ -322,12 +330,12 @@ static void abs_printout(int counter, u64 *count, u64 *noise) */ static void print_counter(int counter) { - u64 *count, *noise; + double avg, stddev; int scaled; - count = event_res_avg[counter]; - noise = event_res_noise[counter]; - scaled = event_scaled_avg[counter]; + avg = avg_stats(&event_res_stats[counter][0]); + stddev = stddev_stats(&event_res_stats[counter][0]); + scaled = avg_stats(&event_scaled_stats[counter]); if (scaled == -1) { fprintf(stderr, " %14s %-24s\n", @@ -336,36 +344,34 @@ static void print_counter(int counter) } if (nsec_counter(counter)) - nsec_printout(counter, count, noise); + nsec_printout(counter, avg, stddev); else - abs_printout(counter, count, noise); + abs_printout(counter, avg, stddev); + + if (scaled) { + double avg_enabled, avg_running; + + avg_enabled = avg_stats(&event_res_stats[counter][1]); + avg_running = avg_stats(&event_res_stats[counter][2]); - if (scaled) fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); + 100 * avg_running / avg_enabled); + } fprintf(stderr, "\n"); } -/* - * normalize_noise noise values down to stddev: - */ -static void normalize_noise(u64 *val) +static void update_stats(const char *name, int idx, struct stats *stats, u64 *val) { - double res; + double sq = *val; - res = (double)*val / (run_count * sqrt((double)run_count)); - - *val = (u64)res; -} - -static void update_avg(const char *name, int idx, u64 *avg, u64 *val) -{ - *avg += *val; + stats->sum += *val; + stats->sum_sq += sq * sq; if (verbose > 1) fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); } + /* * Calculate the averages and noises: */ @@ -377,61 +383,22 @@ static void calc_avg(void) fprintf(stderr, "\n"); for (i = 0; i < run_count; i++) { - update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); - update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); - update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); + update_stats("runtime", 0, &runtime_nsecs_stats, runtime_nsecs + i); + update_stats("walltime", 0, &walltime_nsecs_stats, walltime_nsecs + i); + update_stats("runtime_cycles", 0, &runtime_cycles_stats, runtime_cycles + i); for (j = 0; j < nr_counters; j++) { - update_avg("counter/0", j, - event_res_avg[j]+0, event_res[i][j]+0); - update_avg("counter/1", j, - event_res_avg[j]+1, event_res[i][j]+1); - update_avg("counter/2", j, - event_res_avg[j]+2, event_res[i][j]+2); + update_stats("counter/0", j, + event_res_stats[j]+0, event_res[i][j]+0); + update_stats("counter/1", j, + event_res_stats[j]+1, event_res[i][j]+1); + update_stats("counter/2", j, + event_res_stats[j]+2, event_res[i][j]+2); if (event_scaled[i][j] != (u64)-1) - update_avg("scaled", j, - event_scaled_avg + j, event_scaled[i]+j); - else - event_scaled_avg[j] = -1; + update_stats("scaled", j, + event_scaled_stats + j, event_scaled[i]+j); } } - runtime_nsecs_avg /= run_count; - walltime_nsecs_avg /= run_count; - runtime_cycles_avg /= run_count; - - for (j = 0; j < nr_counters; j++) { - event_res_avg[j][0] /= run_count; - event_res_avg[j][1] /= run_count; - event_res_avg[j][2] /= run_count; - } - - for (i = 0; i < run_count; i++) { - runtime_nsecs_noise += - abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); - walltime_nsecs_noise += - abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); - runtime_cycles_noise += - abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); - - for (j = 0; j < nr_counters; j++) { - event_res_noise[j][0] += - abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); - event_res_noise[j][1] += - abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); - event_res_noise[j][2] += - abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); - } - } - - normalize_noise(&runtime_nsecs_noise); - normalize_noise(&walltime_nsecs_noise); - normalize_noise(&runtime_cycles_noise); - - for (j = 0; j < nr_counters; j++) { - normalize_noise(&event_res_noise[j][0]); - normalize_noise(&event_res_noise[j][1]); - normalize_noise(&event_res_noise[j][2]); - } } static void print_stat(int argc, const char **argv) @@ -458,10 +425,11 @@ static void print_stat(int argc, const char **argv) fprintf(stderr, "\n"); fprintf(stderr, " %14.9f seconds time elapsed", - (double)walltime_nsecs_avg/1e9); + avg_stats(&walltime_nsecs_stats)/1e9); if (run_count > 1) { fprintf(stderr, " ( +- %7.3f%% )", - 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); + 100*stddev_stats(&walltime_nsecs_stats) / + avg_stats(&walltime_nsecs_stats)); } fprintf(stderr, "\n\n"); } From 9e9772c458d50dabdb5327821da3803254638cd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Sep 2009 15:36:08 +0200 Subject: [PATCH 56/60] perf stat: Remove the limit on repeat Since we don't need all the individual samples to calculate the error remove both the limit and the storage overhead associated with that. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 84 +++++++++++++-------------------------- 1 file changed, 27 insertions(+), 57 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 31ffc4d3ba60..9c6377f7152f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -62,8 +62,6 @@ static struct perf_counter_attr default_attrs[] = { }; -#define MAX_RUN 100 - static int system_wide = 0; static unsigned int nr_cpus = 0; static int run_idx = 0; @@ -76,12 +74,8 @@ static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static u64 runtime_nsecs[MAX_RUN]; -static u64 walltime_nsecs[MAX_RUN]; -static u64 runtime_cycles[MAX_RUN]; - -static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; +static u64 event_res[MAX_COUNTERS][3]; +static u64 event_scaled[MAX_COUNTERS]; struct stats { @@ -89,6 +83,14 @@ struct stats double sum_sq; }; +static void update_stats(struct stats *stats, u64 val) +{ + double sq = val; + + stats->sum += val; + stats->sum_sq += sq * sq; +} + static double avg_stats(struct stats *stats) { return stats->sum / run_count; @@ -167,8 +169,9 @@ static void read_counter(int counter) unsigned int cpu; size_t res, nv; int scaled; + int i; - count = event_res[run_idx][counter]; + count = event_res[counter]; count[0] = count[1] = count[2] = 0; @@ -193,24 +196,33 @@ static void read_counter(int counter) scaled = 0; if (scale) { if (count[2] == 0) { - event_scaled[run_idx][counter] = -1; + event_scaled[counter] = -1; count[0] = 0; return; } if (count[2] < count[1]) { - event_scaled[run_idx][counter] = 1; + event_scaled[counter] = 1; count[0] = (unsigned long long) ((double)count[0] * count[1] / count[2] + 0.5); } } + + for (i = 0; i < 3; i++) + update_stats(&event_res_stats[counter][i], count[i]); + + if (verbose) { + fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), + count[0], count[1], count[2]); + } + /* * Save the full runtime - to allow normalization during printout: */ if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) - runtime_nsecs[run_idx] = count[0]; + update_stats(&runtime_nsecs_stats, count[0]); if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) - runtime_cycles[run_idx] = count[0]; + update_stats(&runtime_cycles_stats, count[0]); } static int run_perf_stat(int argc __used, const char **argv) @@ -284,7 +296,7 @@ static int run_perf_stat(int argc __used, const char **argv) t1 = rdclock(); - walltime_nsecs[run_idx] = t1 - t0; + update_stats(&walltime_nsecs_stats, t1 - t0); for (counter = 0; counter < nr_counters; counter++) read_counter(counter); @@ -361,52 +373,10 @@ static void print_counter(int counter) fprintf(stderr, "\n"); } -static void update_stats(const char *name, int idx, struct stats *stats, u64 *val) -{ - double sq = *val; - - stats->sum += *val; - stats->sum_sq += sq * sq; - - if (verbose > 1) - fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); -} - -/* - * Calculate the averages and noises: - */ -static void calc_avg(void) -{ - int i, j; - - if (verbose > 1) - fprintf(stderr, "\n"); - - for (i = 0; i < run_count; i++) { - update_stats("runtime", 0, &runtime_nsecs_stats, runtime_nsecs + i); - update_stats("walltime", 0, &walltime_nsecs_stats, walltime_nsecs + i); - update_stats("runtime_cycles", 0, &runtime_cycles_stats, runtime_cycles + i); - - for (j = 0; j < nr_counters; j++) { - update_stats("counter/0", j, - event_res_stats[j]+0, event_res[i][j]+0); - update_stats("counter/1", j, - event_res_stats[j]+1, event_res[i][j]+1); - update_stats("counter/2", j, - event_res_stats[j]+2, event_res[i][j]+2); - if (event_scaled[i][j] != (u64)-1) - update_stats("scaled", j, - event_scaled_stats + j, event_scaled[i]+j); - } - } -} - static void print_stat(int argc, const char **argv) { int i, counter; - calc_avg(); - fflush(stdout); fprintf(stderr, "\n"); @@ -484,7 +454,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(stat_usage, options); - if (run_count <= 0 || run_count > MAX_RUN) + if (run_count <= 0) usage_with_options(stat_usage, options); /* Set attrs and nr_counters if no event is selected and !null_run */ From 63d40deb2e7c64ed55943d49f078e09fc4b64b54 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Sep 2009 17:03:13 +0200 Subject: [PATCH 57/60] perf stat: Use stddev_mean in stead of stddev When we're computing the mean by sampling the distribution, then the std dev of the mean is related to the std dev of the sample set by: stddev_mean = std_dev / sqrt(N) Which is exactly what we want. This results in the error on the mean decreasing with increasing number of samples. Also fix the scaled == -1, aka not counted case. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9c6377f7152f..e9424fa72420 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -75,7 +75,7 @@ static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static u64 event_res[MAX_COUNTERS][3]; -static u64 event_scaled[MAX_COUNTERS]; +static int event_scaled[MAX_COUNTERS]; struct stats { @@ -97,17 +97,31 @@ static double avg_stats(struct stats *stats) } /* - * stddev = sqrt(1/N (\Sum n_i^2) - avg(n)^2) + * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + * + * (\Sum n_i^2) - ((\Sum n_i)^2)/n + * s^2 ------------------------------- + * n - 1 + * + * http://en.wikipedia.org/wiki/Stddev + * + * The std dev of the mean is related to the std dev by: + * + * s + * s_mean = ------- + * sqrt(n) + * */ static double stddev_stats(struct stats *stats) { double avg = stats->sum / run_count; + double variance = (stats->sum_sq - stats->sum*avg)/(run_count - 1); + double variance_mean = variance / run_count; - return sqrt(stats->sum_sq/run_count - avg*avg); + return sqrt(variance_mean); } struct stats event_res_stats[MAX_COUNTERS][3]; -struct stats event_scaled_stats[MAX_COUNTERS]; struct stats runtime_nsecs_stats; struct stats walltime_nsecs_stats; struct stats runtime_cycles_stats; @@ -343,11 +357,10 @@ static void abs_printout(int counter, double avg, double stddev) static void print_counter(int counter) { double avg, stddev; - int scaled; + int scaled = event_scaled[counter]; avg = avg_stats(&event_res_stats[counter][0]); stddev = stddev_stats(&event_res_stats[counter][0]); - scaled = avg_stats(&event_scaled_stats[counter]); if (scaled == -1) { fprintf(stderr, " %14s %-24s\n", From 8a02631a470d6f2ccec7bcf79c1058b0d4240bce Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Sep 2009 17:26:26 +0200 Subject: [PATCH 58/60] perf stat: More advanced variance computation Use the more advanced single pass variance algorithm outlined on the wikipedia page. This is numerically more stable for larger sample sets. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e9424fa72420..32b5c003f7fc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -79,29 +79,30 @@ static int event_scaled[MAX_COUNTERS]; struct stats { - double sum; - double sum_sq; + double n, mean, M2; }; static void update_stats(struct stats *stats, u64 val) { - double sq = val; + double delta; - stats->sum += val; - stats->sum_sq += sq * sq; + stats->n++; + delta = val - stats->mean; + stats->mean += delta / stats->n; + stats->M2 += delta*(val - stats->mean); } static double avg_stats(struct stats *stats) { - return stats->sum / run_count; + return stats->mean; } /* * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance * - * (\Sum n_i^2) - ((\Sum n_i)^2)/n - * s^2 ------------------------------- - * n - 1 + * (\Sum n_i^2) - ((\Sum n_i)^2)/n + * s^2 = ------------------------------- + * n - 1 * * http://en.wikipedia.org/wiki/Stddev * @@ -114,9 +115,8 @@ static double avg_stats(struct stats *stats) */ static double stddev_stats(struct stats *stats) { - double avg = stats->sum / run_count; - double variance = (stats->sum_sq - stats->sum*avg)/(run_count - 1); - double variance_mean = variance / run_count; + double variance = stats->M2 / (stats->n - 1); + double variance_mean = variance / stats->n; return sqrt(variance_mean); } From 849abde92bd3314a4894f2b4f70b30c2accf8653 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 4 Sep 2009 18:23:38 +0200 Subject: [PATCH 59/60] perf stat: Clean up statistics calculations a bit more Remove some, now useless, global storage. Don't calculate the stddev when not needed. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 32b5c003f7fc..61b828236c11 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -74,7 +74,6 @@ static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static u64 event_res[MAX_COUNTERS][3]; static int event_scaled[MAX_COUNTERS]; struct stats @@ -179,14 +178,12 @@ static inline int nsec_counter(int counter) */ static void read_counter(int counter) { - u64 *count, single_count[3]; + u64 count[3], single_count[3]; unsigned int cpu; size_t res, nv; int scaled; int i; - count = event_res[counter]; - count[0] = count[1] = count[2] = 0; nv = scale ? 3 : 1; @@ -318,13 +315,16 @@ static int run_perf_stat(int argc __used, const char **argv) return WEXITSTATUS(status); } -static void print_noise(double avg, double stddev) +static void print_noise(int counter, double avg) { - if (run_count > 1) - fprintf(stderr, " ( +- %7.3f%% )", 100*stddev / avg); + if (run_count == 1) + return; + + fprintf(stderr, " ( +- %7.3f%% )", + 100 * stddev_stats(&event_res_stats[counter][0]) / avg); } -static void nsec_printout(int counter, double avg, double stddev) +static void nsec_printout(int counter, double avg) { double msecs = avg / 1e6; @@ -334,10 +334,9 @@ static void nsec_printout(int counter, double avg, double stddev) fprintf(stderr, " # %10.3f CPUs ", avg / avg_stats(&walltime_nsecs_stats)); } - print_noise(avg, stddev); } -static void abs_printout(int counter, double avg, double stddev) +static void abs_printout(int counter, double avg) { fprintf(stderr, " %14.0f %-24s", avg, event_name(counter)); @@ -348,7 +347,6 @@ static void abs_printout(int counter, double avg, double stddev) fprintf(stderr, " # %10.3f M/sec", 1000.0 * avg / avg_stats(&runtime_nsecs_stats)); } - print_noise(avg, stddev); } /* @@ -356,12 +354,9 @@ static void abs_printout(int counter, double avg, double stddev) */ static void print_counter(int counter) { - double avg, stddev; + double avg = avg_stats(&event_res_stats[counter][0]); int scaled = event_scaled[counter]; - avg = avg_stats(&event_res_stats[counter][0]); - stddev = stddev_stats(&event_res_stats[counter][0]); - if (scaled == -1) { fprintf(stderr, " %14s %-24s\n", "", event_name(counter)); @@ -369,9 +364,11 @@ static void print_counter(int counter) } if (nsec_counter(counter)) - nsec_printout(counter, avg, stddev); + nsec_printout(counter, avg); else - abs_printout(counter, avg, stddev); + abs_printout(counter, avg); + + print_noise(counter, avg); if (scaled) { double avg_enabled, avg_running; From 6b58e7f146f8d79c08f62087f928e1f01747de71 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 4 Sep 2009 16:39:51 -0300 Subject: [PATCH 60/60] perf tools: Avoid unnecessary work in directory lookups This patch improves some (common) inefficiencies in the handling of directory lookups: - not using the d_type information returned by the kernel - constructing (absolute) paths for file operation even though directory-relative operations using the *at functions is possible There are more places to fix but this is a start. Signed-off-by: Ulrich Drepper Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20090904193951.GB6186@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 64 +++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 89d46c99bc9c..52219d5a507b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,14 +1,12 @@ -#include "../perf.h" #include "util.h" +#include "../perf.h" #include "parse-options.h" #include "parse-events.h" #include "exec_cmd.h" #include "string.h" #include "cache.h" -extern char *strcasestr(const char *haystack, const char *needle); - int nr_counters; struct perf_counter_attr attrs[MAX_COUNTERS]; @@ -113,11 +111,9 @@ static unsigned long hw_cache_stat[C(MAX)] = { [C(BPU)] = (CACHE_READ), }; -#define for_each_subsystem(sys_dir, sys_dirent, sys_next, file, st) \ +#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ - if (snprintf(file, MAXPATHLEN, "%s/%s", debugfs_path, \ - sys_dirent.d_name) && \ - (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ + if (sys_dirent.d_type == DT_DIR && \ (strcmp(sys_dirent.d_name, ".")) && \ (strcmp(sys_dirent.d_name, ".."))) @@ -136,11 +132,9 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) return 0; } -#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ +#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) \ while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ - if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ - sys_dirent.d_name, evt_dirent.d_name) && \ - (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ + if (evt_dirent.d_type == DT_DIR && \ (strcmp(evt_dirent.d_name, ".")) && \ (strcmp(evt_dirent.d_name, "..")) && \ (!tp_event_has_id(&sys_dirent, &evt_dirent))) @@ -163,9 +157,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) struct tracepoint_path *path = NULL; DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; - struct stat st; char id_buf[4]; - int fd; + int sys_dir_fd, fd; u64 id; char evt_path[MAXPATHLEN]; @@ -175,17 +168,23 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) sys_dir = opendir(debugfs_path); if (!sys_dir) goto cleanup; + sys_dir_fd = dirfd(sys_dir); - for_each_subsystem(sys_dir, sys_dirent, sys_next, evt_path, st) { - evt_dir = opendir(evt_path); - if (!evt_dir) - goto cleanup; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, - evt_path, st) { - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", - debugfs_path, sys_dirent.d_name, + for_each_subsystem(sys_dir, sys_dirent, sys_next) { + int dfd = openat(sys_dir_fd, sys_dirent.d_name, + O_RDONLY|O_DIRECTORY), evt_dir_fd; + if (dfd == -1) + continue; + evt_dir = fdopendir(dfd); + if (!evt_dir) { + close(dfd); + continue; + } + evt_dir_fd = dirfd(evt_dir); + for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + snprintf(evt_path, MAXPATHLEN, "%s/id", evt_dirent.d_name); - fd = open(evt_path, O_RDONLY); + fd = openat(evt_dir_fd, evt_path, O_RDONLY); if (fd < 0) continue; if (read(fd, id_buf, sizeof(id_buf)) < 0) { @@ -629,7 +628,7 @@ static void print_tracepoint_events(void) { DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; - struct stat st; + int sys_dir_fd; char evt_path[MAXPATHLEN]; if (valid_debugfs_mount(debugfs_path)) @@ -638,13 +637,20 @@ static void print_tracepoint_events(void) sys_dir = opendir(debugfs_path); if (!sys_dir) goto cleanup; + sys_dir_fd = dirfd(sys_dir); - for_each_subsystem(sys_dir, sys_dirent, sys_next, evt_path, st) { - evt_dir = opendir(evt_path); - if (!evt_dir) - goto cleanup; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, - evt_path, st) { + for_each_subsystem(sys_dir, sys_dirent, sys_next) { + int dfd = openat(sys_dir_fd, sys_dirent.d_name, + O_RDONLY|O_DIRECTORY), evt_dir_fd; + if (dfd == -1) + continue; + evt_dir = fdopendir(dfd); + if (!evt_dir) { + close(dfd); + continue; + } + evt_dir_fd = dirfd(evt_dir); + for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); fprintf(stderr, " %-40s [%s]\n", evt_path,