2008-12-03 17:39:53 +08:00
|
|
|
/*
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* Performance events x86 architecture code
|
2008-12-03 17:39:53 +08:00
|
|
|
*
|
2009-04-29 20:52:50 +08:00
|
|
|
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
|
|
|
|
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
|
|
|
|
* Copyright (C) 2009 Jaswinder Singh Rajput
|
|
|
|
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
|
|
|
|
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
2009-07-21 21:56:48 +08:00
|
|
|
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
|
2010-01-18 16:58:01 +08:00
|
|
|
* Copyright (C) 2009 Google, Inc., Stephane Eranian
|
2008-12-03 17:39:53 +08:00
|
|
|
*
|
|
|
|
* For licencing details see kernel-base/COPYING
|
|
|
|
*/
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
#include <linux/perf_event.h>
|
2008-12-03 17:39:53 +08:00
|
|
|
#include <linux/capability.h>
|
|
|
|
#include <linux/notifier.h>
|
|
|
|
#include <linux/hardirq.h>
|
|
|
|
#include <linux/kprobes.h>
|
2008-12-10 04:43:39 +08:00
|
|
|
#include <linux/module.h>
|
2008-12-03 17:39:53 +08:00
|
|
|
#include <linux/kdebug.h>
|
|
|
|
#include <linux/sched.h>
|
2009-03-31 01:07:15 +08:00
|
|
|
#include <linux/uaccess.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2009-06-15 19:07:24 +08:00
|
|
|
#include <linux/highmem.h>
|
2009-07-21 21:56:48 +08:00
|
|
|
#include <linux/cpu.h>
|
2010-01-22 23:32:17 +08:00
|
|
|
#include <linux/bitops.h>
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
#include <asm/apic.h>
|
2009-03-31 01:07:15 +08:00
|
|
|
#include <asm/stacktrace.h>
|
2009-03-31 01:07:16 +08:00
|
|
|
#include <asm/nmi.h>
|
2010-03-17 18:07:16 +08:00
|
|
|
#include <asm/compat.h>
|
2011-03-03 10:34:50 +08:00
|
|
|
#include <asm/smp.h>
|
2011-04-16 08:27:55 +08:00
|
|
|
#include <asm/alternative.h>
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2010-03-08 20:51:31 +08:00
|
|
|
#if 0
|
|
|
|
#undef wrmsrl
|
|
|
|
#define wrmsrl(msr, val) \
|
|
|
|
do { \
|
|
|
|
trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
|
|
|
|
(unsigned long)(val)); \
|
|
|
|
native_write_msr((msr), (u32)((u64)(val)), \
|
|
|
|
(u32)((u64)(val) >> 32)); \
|
|
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
|
2010-03-03 20:12:23 +08:00
|
|
|
/*
|
|
|
|
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
|
|
|
|
*/
|
|
|
|
static unsigned long
|
|
|
|
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
|
|
|
|
{
|
|
|
|
unsigned long offset, addr = (unsigned long)from;
|
|
|
|
unsigned long size, len = 0;
|
|
|
|
struct page *page;
|
|
|
|
void *map;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
do {
|
|
|
|
ret = __get_user_pages_fast(addr, 1, 0, &page);
|
|
|
|
if (!ret)
|
|
|
|
break;
|
|
|
|
|
|
|
|
offset = addr & (PAGE_SIZE - 1);
|
|
|
|
size = min(PAGE_SIZE - offset, n - len);
|
|
|
|
|
2010-10-27 05:21:53 +08:00
|
|
|
map = kmap_atomic(page);
|
2010-03-03 20:12:23 +08:00
|
|
|
memcpy(to, map+offset, size);
|
2010-10-27 05:21:53 +08:00
|
|
|
kunmap_atomic(map);
|
2010-03-03 20:12:23 +08:00
|
|
|
put_page(page);
|
|
|
|
|
|
|
|
len += size;
|
|
|
|
to += size;
|
|
|
|
addr += size;
|
|
|
|
|
|
|
|
} while (len < n);
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
struct event_constraint {
|
2010-01-22 22:25:59 +08:00
|
|
|
union {
|
|
|
|
unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
2010-02-01 22:36:30 +08:00
|
|
|
u64 idxmsk64;
|
2010-01-22 22:25:59 +08:00
|
|
|
};
|
2010-02-01 22:36:30 +08:00
|
|
|
u64 code;
|
|
|
|
u64 cmask;
|
2010-01-22 23:32:17 +08:00
|
|
|
int weight;
|
2010-01-18 16:58:01 +08:00
|
|
|
};
|
|
|
|
|
2010-02-08 23:17:01 +08:00
|
|
|
struct amd_nb {
|
|
|
|
int nb_id; /* NorthBridge id */
|
|
|
|
int refcnt; /* reference count */
|
|
|
|
struct perf_event *owners[X86_PMC_IDX_MAX];
|
|
|
|
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
|
|
|
|
};
|
|
|
|
|
2011-03-03 10:34:47 +08:00
|
|
|
struct intel_percore;
|
|
|
|
|
2010-03-03 19:02:30 +08:00
|
|
|
#define MAX_LBR_ENTRIES 16
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
struct cpu_hw_events {
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* Generic x86 PMC bits
|
|
|
|
*/
|
2010-01-18 16:58:01 +08:00
|
|
|
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
|
2009-04-29 22:55:56 +08:00
|
|
|
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
2010-09-16 00:20:34 +08:00
|
|
|
unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
2009-03-06 01:08:27 +08:00
|
|
|
int enabled;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
int n_events;
|
|
|
|
int n_added;
|
perf_events: Fix event scheduling issues introduced by transactional API
The transactional API patch between the generic and model-specific
code introduced several important bugs with event scheduling, at
least on X86. If you had pinned events, e.g., watchdog, and were
over-committing the PMU, you would get bogus counts. The bug was
showing up on Intel CPU because events would move around more
often that on AMD. But the problem also existed on AMD, though
harder to expose.
The issues were:
- group_sched_in() was missing a cancel_txn() in the error path
- cpuc->n_added was not properly maintained, leading to missing
actions in hw_perf_enable(), i.e., n_running being 0. You cannot
update n_added until you know the transaction has succeeded. In
case of failed transaction n_added was not adjusted back.
- in case of failed transactions, event_sched_out() was called
and eventually invoked x86_disable_event() to touch the HW reg.
But with transactions, on X86, event_sched_in() does not touch
HW registers, it simply collects events into a list. Thus, you
could end up calling x86_disable_event() on a counter which
did not correspond to the current event when idx != -1.
The patch modifies the generic and X86 code to avoid all those problems.
First, we keep track of the number of events added last. In case the
transaction fails, we substract them from n_added. This approach is
necessary (as opposed to delaying updates to n_added) because not all
event updates use the transaction API, e.g., single events.
Second, we encapsulate the event_sched_in() and event_sched_out() in
group_sched_in() inside the transaction. That makes the operations
symmetrical and you can also detect that you are inside a transaction
and skip the HW reg access by checking cpuc->group_flag.
With this patch, you can now overcommit the PMU even with pinned
system-wide events present and still get valid counts.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274796225.5882.1389.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-05-25 22:23:10 +08:00
|
|
|
int n_txn;
|
2010-01-18 16:58:01 +08:00
|
|
|
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
|
2010-02-01 20:50:01 +08:00
|
|
|
u64 tags[X86_PMC_IDX_MAX];
|
2010-01-18 16:58:01 +08:00
|
|
|
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
|
2010-04-23 13:56:12 +08:00
|
|
|
unsigned int group_flag;
|
|
|
|
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* Intel DebugStore bits
|
|
|
|
*/
|
|
|
|
struct debug_store *ds;
|
|
|
|
u64 pebs_enabled;
|
|
|
|
|
2010-03-03 19:02:30 +08:00
|
|
|
/*
|
|
|
|
* Intel LBR bits
|
|
|
|
*/
|
|
|
|
int lbr_users;
|
|
|
|
void *lbr_context;
|
|
|
|
struct perf_branch_stack lbr_stack;
|
|
|
|
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
|
|
|
|
|
2011-03-03 10:34:47 +08:00
|
|
|
/*
|
|
|
|
* Intel percore register state.
|
|
|
|
* Coordinate shared resources between HT threads.
|
|
|
|
*/
|
|
|
|
int percore_used; /* Used by this CPU? */
|
|
|
|
struct intel_percore *per_core;
|
|
|
|
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* AMD specific bits
|
|
|
|
*/
|
2010-02-08 23:17:01 +08:00
|
|
|
struct amd_nb *amd_nb;
|
2009-10-06 22:42:09 +08:00
|
|
|
};
|
|
|
|
|
2010-01-29 20:25:12 +08:00
|
|
|
#define __EVENT_CONSTRAINT(c, n, m, w) {\
|
2010-02-01 22:36:30 +08:00
|
|
|
{ .idxmsk64 = (n) }, \
|
2010-01-22 22:25:59 +08:00
|
|
|
.code = (c), \
|
|
|
|
.cmask = (m), \
|
2010-01-29 20:25:12 +08:00
|
|
|
.weight = (w), \
|
2010-01-22 22:25:59 +08:00
|
|
|
}
|
2009-10-06 22:42:09 +08:00
|
|
|
|
2010-01-29 20:25:12 +08:00
|
|
|
#define EVENT_CONSTRAINT(c, n, m) \
|
|
|
|
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
|
|
|
|
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* Constraint on the Event code.
|
|
|
|
*/
|
2010-01-28 06:07:46 +08:00
|
|
|
#define INTEL_EVENT_CONSTRAINT(c, n) \
|
2010-03-30 17:28:21 +08:00
|
|
|
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
|
2010-01-22 22:38:26 +08:00
|
|
|
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* Constraint on the Event code + UMask + fixed-mask
|
2010-03-30 17:28:21 +08:00
|
|
|
*
|
|
|
|
* filter mask to validate fixed counter events.
|
|
|
|
* the following filters disqualify for fixed counters:
|
|
|
|
* - inv
|
|
|
|
* - edge
|
|
|
|
* - cnt-mask
|
|
|
|
* The other filters are supported by fixed counters.
|
|
|
|
* The any-thread option is supported starting with v3.
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
*/
|
2010-01-28 06:07:46 +08:00
|
|
|
#define FIXED_EVENT_CONSTRAINT(c, n) \
|
2010-03-30 17:28:21 +08:00
|
|
|
EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
|
2010-01-22 22:38:26 +08:00
|
|
|
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* Constraint on the Event code + UMask
|
|
|
|
*/
|
2011-03-02 21:27:04 +08:00
|
|
|
#define INTEL_UEVENT_CONSTRAINT(c, n) \
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
|
|
|
|
|
2010-01-28 06:07:46 +08:00
|
|
|
#define EVENT_CONSTRAINT_END \
|
|
|
|
EVENT_CONSTRAINT(0, 0, 0)
|
|
|
|
|
|
|
|
#define for_each_event_constraint(e, c) \
|
2010-04-14 04:23:15 +08:00
|
|
|
for ((e) = (c); (e)->weight; (e)++)
|
2009-10-06 22:42:09 +08:00
|
|
|
|
2011-03-03 10:34:47 +08:00
|
|
|
/*
|
|
|
|
* Extra registers for specific events.
|
|
|
|
* Some events need large masks and require external MSRs.
|
|
|
|
* Define a mapping to these extra registers.
|
|
|
|
*/
|
|
|
|
struct extra_reg {
|
|
|
|
unsigned int event;
|
|
|
|
unsigned int msr;
|
|
|
|
u64 config_mask;
|
|
|
|
u64 valid_mask;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define EVENT_EXTRA_REG(e, ms, m, vm) { \
|
|
|
|
.event = (e), \
|
|
|
|
.msr = (ms), \
|
|
|
|
.config_mask = (m), \
|
|
|
|
.valid_mask = (vm), \
|
|
|
|
}
|
|
|
|
#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
|
|
|
|
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
|
|
|
|
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
|
|
|
|
|
2010-03-04 00:07:40 +08:00
|
|
|
union perf_capabilities {
|
|
|
|
struct {
|
|
|
|
u64 lbr_format : 6;
|
|
|
|
u64 pebs_trap : 1;
|
|
|
|
u64 pebs_arch_reg : 1;
|
|
|
|
u64 pebs_format : 4;
|
|
|
|
u64 smm_freeze : 1;
|
|
|
|
};
|
|
|
|
u64 capabilities;
|
|
|
|
};
|
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
/*
|
2009-04-29 18:47:04 +08:00
|
|
|
* struct x86_pmu - generic x86 pmu
|
2008-12-03 17:39:53 +08:00
|
|
|
*/
|
2009-04-29 18:47:04 +08:00
|
|
|
struct x86_pmu {
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* Generic x86 PMC bits
|
|
|
|
*/
|
2009-04-29 18:47:13 +08:00
|
|
|
const char *name;
|
|
|
|
int version;
|
2009-06-03 13:12:55 +08:00
|
|
|
int (*handle_irq)(struct pt_regs *);
|
2009-05-13 22:21:38 +08:00
|
|
|
void (*disable_all)(void);
|
2010-03-26 21:08:44 +08:00
|
|
|
void (*enable_all)(int added);
|
2010-03-03 03:32:08 +08:00
|
|
|
void (*enable)(struct perf_event *);
|
|
|
|
void (*disable)(struct perf_event *);
|
2011-06-23 20:49:18 +08:00
|
|
|
void (*hw_watchdog_set_attr)(struct perf_event_attr *attr);
|
2010-03-30 23:00:06 +08:00
|
|
|
int (*hw_config)(struct perf_event *event);
|
2010-03-12 00:54:39 +08:00
|
|
|
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
|
2009-02-28 21:07:49 +08:00
|
|
|
unsigned eventsel;
|
|
|
|
unsigned perfctr;
|
2009-03-06 01:08:27 +08:00
|
|
|
u64 (*event_map)(int);
|
2009-02-28 21:07:49 +08:00
|
|
|
int max_events;
|
2010-03-30 00:36:50 +08:00
|
|
|
int num_counters;
|
|
|
|
int num_counters_fixed;
|
|
|
|
int cntval_bits;
|
|
|
|
u64 cntval_mask;
|
2009-08-11 16:40:08 +08:00
|
|
|
int apic;
|
2009-04-29 18:47:23 +08:00
|
|
|
u64 max_period;
|
2010-01-22 23:32:17 +08:00
|
|
|
struct event_constraint *
|
|
|
|
(*get_event_constraints)(struct cpu_hw_events *cpuc,
|
|
|
|
struct perf_event *event);
|
|
|
|
|
2010-01-22 22:25:59 +08:00
|
|
|
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
|
|
|
|
struct perf_event *event);
|
2010-01-22 23:32:17 +08:00
|
|
|
struct event_constraint *event_constraints;
|
2011-03-03 10:34:47 +08:00
|
|
|
struct event_constraint *percore_constraints;
|
2010-03-05 04:49:01 +08:00
|
|
|
void (*quirks)(void);
|
2010-06-03 05:23:04 +08:00
|
|
|
int perfctr_second_write;
|
2010-03-05 20:01:18 +08:00
|
|
|
|
2010-03-24 02:31:15 +08:00
|
|
|
int (*cpu_prepare)(int cpu);
|
2010-03-05 20:01:18 +08:00
|
|
|
void (*cpu_starting)(int cpu);
|
|
|
|
void (*cpu_dying)(int cpu);
|
|
|
|
void (*cpu_dead)(int cpu);
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Intel Arch Perfmon v2+
|
|
|
|
*/
|
2010-03-04 00:07:40 +08:00
|
|
|
u64 intel_ctrl;
|
|
|
|
union perf_capabilities intel_cap;
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Intel DebugStore bits
|
|
|
|
*/
|
|
|
|
int bts, pebs;
|
2010-10-19 20:22:50 +08:00
|
|
|
int bts_active, pebs_active;
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
int pebs_record_size;
|
|
|
|
void (*drain_pebs)(struct pt_regs *regs);
|
|
|
|
struct event_constraint *pebs_constraints;
|
2010-03-03 19:02:30 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Intel LBR
|
|
|
|
*/
|
|
|
|
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
|
|
|
|
int lbr_nr; /* hardware stack size */
|
2011-03-03 10:34:47 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Extra registers for events
|
|
|
|
*/
|
|
|
|
struct extra_reg *extra_regs;
|
2009-02-27 20:39:09 +08:00
|
|
|
};
|
|
|
|
|
2009-04-29 18:47:11 +08:00
|
|
|
static struct x86_pmu x86_pmu __read_mostly;
|
2009-02-27 20:39:09 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
|
2009-03-06 01:08:27 +08:00
|
|
|
.enabled = 1,
|
|
|
|
};
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2010-03-03 03:16:01 +08:00
|
|
|
static int x86_perf_event_set_period(struct perf_event *event);
|
2009-10-06 22:42:09 +08:00
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
/*
|
2009-09-21 17:31:35 +08:00
|
|
|
* Generalized hw caching related hw_event table, filled
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
* in on a per model basis. A value of 0 means
|
2009-09-21 17:31:35 +08:00
|
|
|
* 'not supported', -1 means 'hw_event makes no sense on
|
|
|
|
* this CPU', any other value means the raw hw_event
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
* ID.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define C(x) PERF_COUNT_HW_CACHE_##x
|
|
|
|
|
|
|
|
static u64 __read_mostly hw_cache_event_ids
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
2011-03-03 10:34:48 +08:00
|
|
|
static u64 __read_mostly hw_cache_extra_regs
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX];
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
|
2011-06-23 20:49:18 +08:00
|
|
|
void hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr)
|
|
|
|
{
|
|
|
|
if (x86_pmu.hw_watchdog_set_attr)
|
|
|
|
x86_pmu.hw_watchdog_set_attr(wd_attr);
|
|
|
|
}
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* Propagate event elapsed time into the generic event.
|
|
|
|
* Can only be executed on the CPU where the event is active.
|
2008-12-13 16:00:03 +08:00
|
|
|
* Returns the delta events processed.
|
|
|
|
*/
|
2009-04-29 18:47:22 +08:00
|
|
|
static u64
|
2010-03-03 03:18:39 +08:00
|
|
|
x86_perf_event_update(struct perf_event *event)
|
2008-12-13 16:00:03 +08:00
|
|
|
{
|
2010-03-03 03:18:39 +08:00
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
2010-03-30 00:36:50 +08:00
|
|
|
int shift = 64 - x86_pmu.cntval_bits;
|
2009-05-13 15:45:19 +08:00
|
|
|
u64 prev_raw_count, new_raw_count;
|
2010-03-03 03:18:39 +08:00
|
|
|
int idx = hwc->idx;
|
2009-05-13 15:45:19 +08:00
|
|
|
s64 delta;
|
2008-12-13 16:00:03 +08:00
|
|
|
|
2009-07-21 21:56:48 +08:00
|
|
|
if (idx == X86_PMC_IDX_FIXED_BTS)
|
|
|
|
return 0;
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* Careful: an NMI might modify the previous event value.
|
2008-12-13 16:00:03 +08:00
|
|
|
*
|
|
|
|
* Our tactic to handle this is to first atomically read and
|
|
|
|
* exchange a new raw count - then add that new-prev delta
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* count to the generic event atomically:
|
2008-12-13 16:00:03 +08:00
|
|
|
*/
|
|
|
|
again:
|
2010-05-21 20:43:08 +08:00
|
|
|
prev_raw_count = local64_read(&hwc->prev_count);
|
2011-02-03 00:40:59 +08:00
|
|
|
rdmsrl(hwc->event_base, new_raw_count);
|
2008-12-13 16:00:03 +08:00
|
|
|
|
2010-05-21 20:43:08 +08:00
|
|
|
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
2008-12-13 16:00:03 +08:00
|
|
|
new_raw_count) != prev_raw_count)
|
|
|
|
goto again;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we have the new raw value and have updated the prev
|
|
|
|
* timestamp already. We can now calculate the elapsed delta
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* (event-)time and add that to the generic event.
|
2008-12-13 16:00:03 +08:00
|
|
|
*
|
|
|
|
* Careful, not all hw sign-extends above the physical width
|
2009-05-13 15:45:19 +08:00
|
|
|
* of the count.
|
2008-12-13 16:00:03 +08:00
|
|
|
*/
|
2009-05-13 15:45:19 +08:00
|
|
|
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
|
|
|
delta >>= shift;
|
2008-12-13 16:00:03 +08:00
|
|
|
|
2010-05-21 20:43:08 +08:00
|
|
|
local64_add(delta, &event->count);
|
|
|
|
local64_sub(delta, &hwc->period_left);
|
2009-04-29 18:47:22 +08:00
|
|
|
|
|
|
|
return new_raw_count;
|
2008-12-13 16:00:03 +08:00
|
|
|
}
|
|
|
|
|
2011-02-03 00:36:12 +08:00
|
|
|
static inline int x86_pmu_addr_offset(int index)
|
|
|
|
{
|
2011-04-16 08:27:55 +08:00
|
|
|
int offset;
|
|
|
|
|
|
|
|
/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
|
|
|
|
alternative_io(ASM_NOP2,
|
|
|
|
"shll $1, %%eax",
|
|
|
|
X86_FEATURE_PERFCTR_CORE,
|
|
|
|
"=a" (offset),
|
|
|
|
"a" (index));
|
|
|
|
|
|
|
|
return offset;
|
2011-02-03 00:36:12 +08:00
|
|
|
}
|
|
|
|
|
2011-02-03 00:40:57 +08:00
|
|
|
static inline unsigned int x86_pmu_config_addr(int index)
|
|
|
|
{
|
2011-02-03 00:36:12 +08:00
|
|
|
return x86_pmu.eventsel + x86_pmu_addr_offset(index);
|
2011-02-03 00:40:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int x86_pmu_event_addr(int index)
|
|
|
|
{
|
2011-02-03 00:36:12 +08:00
|
|
|
return x86_pmu.perfctr + x86_pmu_addr_offset(index);
|
2011-02-03 00:40:57 +08:00
|
|
|
}
|
|
|
|
|
2011-03-03 10:34:47 +08:00
|
|
|
/*
|
|
|
|
* Find and validate any extra registers to set up.
|
|
|
|
*/
|
|
|
|
static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
|
|
|
|
{
|
|
|
|
struct extra_reg *er;
|
|
|
|
|
|
|
|
event->hw.extra_reg = 0;
|
|
|
|
event->hw.extra_config = 0;
|
|
|
|
|
|
|
|
if (!x86_pmu.extra_regs)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (er = x86_pmu.extra_regs; er->msr; er++) {
|
|
|
|
if (er->event != (config & er->config_mask))
|
|
|
|
continue;
|
|
|
|
if (event->attr.config1 & ~er->valid_mask)
|
|
|
|
return -EINVAL;
|
|
|
|
event->hw.extra_reg = er->msr;
|
|
|
|
event->hw.extra_config = event->attr.config1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
static atomic_t active_events;
|
2009-03-31 01:07:16 +08:00
|
|
|
static DEFINE_MUTEX(pmc_reserve_mutex);
|
|
|
|
|
2010-03-17 19:49:10 +08:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
|
|
|
2009-03-31 01:07:16 +08:00
|
|
|
static bool reserve_pmc_hardware(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
for (i = 0; i < x86_pmu.num_counters; i++) {
|
2011-02-03 00:40:57 +08:00
|
|
|
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
|
2009-03-31 01:07:16 +08:00
|
|
|
goto perfctr_fail;
|
|
|
|
}
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
for (i = 0; i < x86_pmu.num_counters; i++) {
|
2011-02-03 00:40:57 +08:00
|
|
|
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
|
2009-03-31 01:07:16 +08:00
|
|
|
goto eventsel_fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
eventsel_fail:
|
|
|
|
for (i--; i >= 0; i--)
|
2011-02-03 00:40:57 +08:00
|
|
|
release_evntsel_nmi(x86_pmu_config_addr(i));
|
2009-03-31 01:07:16 +08:00
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
i = x86_pmu.num_counters;
|
2009-03-31 01:07:16 +08:00
|
|
|
|
|
|
|
perfctr_fail:
|
|
|
|
for (i--; i >= 0; i--)
|
2011-02-03 00:40:57 +08:00
|
|
|
release_perfctr_nmi(x86_pmu_event_addr(i));
|
2009-03-31 01:07:16 +08:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void release_pmc_hardware(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
for (i = 0; i < x86_pmu.num_counters; i++) {
|
2011-02-03 00:40:57 +08:00
|
|
|
release_perfctr_nmi(x86_pmu_event_addr(i));
|
|
|
|
release_evntsel_nmi(x86_pmu_config_addr(i));
|
2009-03-31 01:07:16 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-03-17 19:49:10 +08:00
|
|
|
#else
|
|
|
|
|
|
|
|
static bool reserve_pmc_hardware(void) { return true; }
|
|
|
|
static void release_pmc_hardware(void) {}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2010-11-23 05:55:23 +08:00
|
|
|
static bool check_hw_exists(void)
|
|
|
|
{
|
|
|
|
u64 val, val_new = 0;
|
2010-12-08 22:56:23 +08:00
|
|
|
int i, reg, ret = 0;
|
2010-11-23 05:55:23 +08:00
|
|
|
|
2010-12-08 22:56:23 +08:00
|
|
|
/*
|
|
|
|
* Check to see if the BIOS enabled any of the counters, if so
|
|
|
|
* complain and bail.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < x86_pmu.num_counters; i++) {
|
2011-02-03 00:40:57 +08:00
|
|
|
reg = x86_pmu_config_addr(i);
|
2010-12-08 22:56:23 +08:00
|
|
|
ret = rdmsrl_safe(reg, &val);
|
|
|
|
if (ret)
|
|
|
|
goto msr_fail;
|
|
|
|
if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
|
|
|
|
goto bios_fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (x86_pmu.num_counters_fixed) {
|
|
|
|
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
|
|
|
ret = rdmsrl_safe(reg, &val);
|
|
|
|
if (ret)
|
|
|
|
goto msr_fail;
|
|
|
|
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
|
|
|
|
if (val & (0x03 << i*4))
|
|
|
|
goto bios_fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now write a value and read it back to see if it matches,
|
|
|
|
* this is needed to detect certain hardware emulators (qemu/kvm)
|
|
|
|
* that don't trap on the MSR access and always return 0s.
|
|
|
|
*/
|
2010-11-23 05:55:23 +08:00
|
|
|
val = 0xabcdUL;
|
2011-02-03 00:40:57 +08:00
|
|
|
ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
|
|
|
|
ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
|
2010-11-23 05:55:23 +08:00
|
|
|
if (ret || val != val_new)
|
2010-12-08 22:56:23 +08:00
|
|
|
goto msr_fail;
|
2010-11-23 05:55:23 +08:00
|
|
|
|
|
|
|
return true;
|
2010-12-08 22:56:23 +08:00
|
|
|
|
|
|
|
bios_fail:
|
2011-03-25 17:24:23 +08:00
|
|
|
/*
|
|
|
|
* We still allow the PMU driver to operate:
|
|
|
|
*/
|
|
|
|
printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
|
2010-12-08 22:56:23 +08:00
|
|
|
printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
|
2011-03-25 17:24:23 +08:00
|
|
|
|
|
|
|
return true;
|
2010-12-08 22:56:23 +08:00
|
|
|
|
|
|
|
msr_fail:
|
|
|
|
printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
|
2011-03-25 17:24:23 +08:00
|
|
|
|
2010-12-08 22:56:23 +08:00
|
|
|
return false;
|
2010-11-23 05:55:23 +08:00
|
|
|
}
|
|
|
|
|
2010-10-19 20:50:02 +08:00
|
|
|
static void reserve_ds_buffers(void);
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
static void release_ds_buffers(void);
|
2009-07-21 21:56:48 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
static void hw_perf_event_destroy(struct perf_event *event)
|
2009-03-31 01:07:16 +08:00
|
|
|
{
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
|
2009-03-31 01:07:16 +08:00
|
|
|
release_pmc_hardware();
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
release_ds_buffers();
|
2009-03-31 01:07:16 +08:00
|
|
|
mutex_unlock(&pmc_reserve_mutex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-29 18:47:20 +08:00
|
|
|
static inline int x86_pmu_initialized(void)
|
|
|
|
{
|
|
|
|
return x86_pmu.handle_irq != NULL;
|
|
|
|
}
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
static inline int
|
2011-03-03 10:34:48 +08:00
|
|
|
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
{
|
2011-03-03 10:34:48 +08:00
|
|
|
struct perf_event_attr *attr = &event->attr;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
unsigned int cache_type, cache_op, cache_result;
|
|
|
|
u64 config, val;
|
|
|
|
|
|
|
|
config = attr->config;
|
|
|
|
|
|
|
|
cache_type = (config >> 0) & 0xff;
|
|
|
|
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
cache_op = (config >> 8) & 0xff;
|
|
|
|
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
cache_result = (config >> 16) & 0xff;
|
|
|
|
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
val = hw_cache_event_ids[cache_type][cache_op][cache_result];
|
|
|
|
|
|
|
|
if (val == 0)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
if (val == -1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
hwc->config |= val;
|
2011-03-03 10:34:48 +08:00
|
|
|
attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
|
|
|
|
return x86_pmu_extra_regs(val, event);
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-06 02:22:46 +08:00
|
|
|
}
|
|
|
|
|
2010-04-14 04:23:11 +08:00
|
|
|
static int x86_setup_perfctr(struct perf_event *event)
|
|
|
|
{
|
|
|
|
struct perf_event_attr *attr = &event->attr;
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
u64 config;
|
|
|
|
|
2010-11-23 23:21:43 +08:00
|
|
|
if (!is_sampling_event(event)) {
|
2010-04-14 04:23:11 +08:00
|
|
|
hwc->sample_period = x86_pmu.max_period;
|
|
|
|
hwc->last_period = hwc->sample_period;
|
2010-05-21 20:43:08 +08:00
|
|
|
local64_set(&hwc->period_left, hwc->sample_period);
|
2010-04-14 04:23:11 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If we have a PMU initialized but no APIC
|
|
|
|
* interrupts, we cannot sample hardware
|
|
|
|
* events (user-space has to fall back and
|
|
|
|
* sample via a hrtimer based software event):
|
|
|
|
*/
|
|
|
|
if (!x86_pmu.apic)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
x86, perf event: Turn off unstructured raw event access to offcore registers
Andi Kleen pointed out that the Intel offcore support patches were merged
without user-space tool support to the functionality:
|
| The offcore_msr perf kernel code was merged into 2.6.39-rc*, but the
| user space bits were not. This made it impossible to set the extra mask
| and actually do the OFFCORE profiling
|
Andi submitted a preliminary patch for user-space support, as an
extension to perf's raw event syntax:
|
| Some raw events -- like the Intel OFFCORE events -- support additional
| parameters. These can be appended after a ':'.
|
| For example on a multi socket Intel Nehalem:
|
| perf stat -e r1b7:20ff -a sleep 1
|
| Profile the OFFCORE_RESPONSE.ANY_REQUEST with event mask REMOTE_DRAM_0
| that measures any access to DRAM on another socket.
|
But this kind of usability is absolutely unacceptable - users should not
be expected to type in magic, CPU and model specific incantations to get
access to useful hardware functionality.
The proper solution is to expose useful offcore functionality via
generalized events - that way users do not have to care which specific
CPU model they are using, they can use the conceptual event and not some
model specific quirky hexa number.
We already have such generalization in place for CPU cache events,
and it's all very extensible.
"Offcore" events measure general DRAM access patters along various
parameters. They are particularly useful in NUMA systems.
We want to support them via generalized DRAM events: either as the
fourth level of cache (after the last-level cache), or as a separate
generalization category.
That way user-space support would be very obvious, memory access
profiling could be done via self-explanatory commands like:
perf record -e dram ./myapp
perf record -e dram-remote ./myapp
... to measure DRAM accesses or more expensive cross-node NUMA DRAM
accesses.
These generalized events would work on all CPUs and architectures that
have comparable PMU features.
( Note, these are just examples: actual implementation could have more
sophistication and more parameter - as long as they center around
similarly simple usecases. )
Now we do not want to revert *all* of the current offcore bits, as they
are still somewhat useful for generic last-level-cache events, implemented
in this commit:
e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere
But we definitely do not yet want to expose the unstructured raw events
to user-space, until better generalization and usability is implemented
for these hardware event features.
( Note: after generalization has been implemented raw offcore events can be
supported as well: there can always be an odd event that is marginally
useful but not useful enough to generalize. DRAM profiling is definitely
*not* such a category so generalization must be done first. )
Furthermore, PERF_TYPE_RAW access to these registers was not intended
to go upstream without proper support - it was a side-effect of the above
e994d7d23a0b commit, not mentioned in the changelog.
As v2.6.39 is nearing release we go for the simplest approach: disable
the PERF_TYPE_RAW offcore hack for now, before it escapes into a released
kernel and becomes an ABI.
Once proper structure is implemented for these hardware events and users
are offered usable solutions we can revisit this issue.
Reported-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1302658203-4239-1-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-22 14:44:38 +08:00
|
|
|
/*
|
|
|
|
* Do not allow config1 (extended registers) to propagate,
|
|
|
|
* there's no sane user-space generalization yet:
|
|
|
|
*/
|
2010-04-14 04:23:11 +08:00
|
|
|
if (attr->type == PERF_TYPE_RAW)
|
x86, perf event: Turn off unstructured raw event access to offcore registers
Andi Kleen pointed out that the Intel offcore support patches were merged
without user-space tool support to the functionality:
|
| The offcore_msr perf kernel code was merged into 2.6.39-rc*, but the
| user space bits were not. This made it impossible to set the extra mask
| and actually do the OFFCORE profiling
|
Andi submitted a preliminary patch for user-space support, as an
extension to perf's raw event syntax:
|
| Some raw events -- like the Intel OFFCORE events -- support additional
| parameters. These can be appended after a ':'.
|
| For example on a multi socket Intel Nehalem:
|
| perf stat -e r1b7:20ff -a sleep 1
|
| Profile the OFFCORE_RESPONSE.ANY_REQUEST with event mask REMOTE_DRAM_0
| that measures any access to DRAM on another socket.
|
But this kind of usability is absolutely unacceptable - users should not
be expected to type in magic, CPU and model specific incantations to get
access to useful hardware functionality.
The proper solution is to expose useful offcore functionality via
generalized events - that way users do not have to care which specific
CPU model they are using, they can use the conceptual event and not some
model specific quirky hexa number.
We already have such generalization in place for CPU cache events,
and it's all very extensible.
"Offcore" events measure general DRAM access patters along various
parameters. They are particularly useful in NUMA systems.
We want to support them via generalized DRAM events: either as the
fourth level of cache (after the last-level cache), or as a separate
generalization category.
That way user-space support would be very obvious, memory access
profiling could be done via self-explanatory commands like:
perf record -e dram ./myapp
perf record -e dram-remote ./myapp
... to measure DRAM accesses or more expensive cross-node NUMA DRAM
accesses.
These generalized events would work on all CPUs and architectures that
have comparable PMU features.
( Note, these are just examples: actual implementation could have more
sophistication and more parameter - as long as they center around
similarly simple usecases. )
Now we do not want to revert *all* of the current offcore bits, as they
are still somewhat useful for generic last-level-cache events, implemented
in this commit:
e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere
But we definitely do not yet want to expose the unstructured raw events
to user-space, until better generalization and usability is implemented
for these hardware event features.
( Note: after generalization has been implemented raw offcore events can be
supported as well: there can always be an odd event that is marginally
useful but not useful enough to generalize. DRAM profiling is definitely
*not* such a category so generalization must be done first. )
Furthermore, PERF_TYPE_RAW access to these registers was not intended
to go upstream without proper support - it was a side-effect of the above
e994d7d23a0b commit, not mentioned in the changelog.
As v2.6.39 is nearing release we go for the simplest approach: disable
the PERF_TYPE_RAW offcore hack for now, before it escapes into a released
kernel and becomes an ABI.
Once proper structure is implemented for these hardware events and users
are offered usable solutions we can revisit this issue.
Reported-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1302658203-4239-1-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-04-22 14:44:38 +08:00
|
|
|
return 0;
|
2010-04-14 04:23:11 +08:00
|
|
|
|
|
|
|
if (attr->type == PERF_TYPE_HW_CACHE)
|
2011-03-03 10:34:48 +08:00
|
|
|
return set_ext_hw_attr(hwc, event);
|
2010-04-14 04:23:11 +08:00
|
|
|
|
|
|
|
if (attr->config >= x86_pmu.max_events)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The generic map:
|
|
|
|
*/
|
|
|
|
config = x86_pmu.event_map(attr->config);
|
|
|
|
|
|
|
|
if (config == 0)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
if (config == -1LL)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Branch tracing:
|
|
|
|
*/
|
2011-04-26 19:24:33 +08:00
|
|
|
if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
|
|
|
|
!attr->freq && hwc->sample_period == 1) {
|
2010-04-14 04:23:11 +08:00
|
|
|
/* BTS is not supported by this architecture. */
|
2010-10-19 20:22:50 +08:00
|
|
|
if (!x86_pmu.bts_active)
|
2010-04-14 04:23:11 +08:00
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
/* BTS is currently only allowed for user-mode. */
|
|
|
|
if (!attr->exclude_kernel)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
hwc->config |= config;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2010-04-14 04:23:10 +08:00
|
|
|
|
2010-03-30 23:00:06 +08:00
|
|
|
static int x86_pmu_hw_config(struct perf_event *event)
|
2010-03-12 00:54:39 +08:00
|
|
|
{
|
2010-04-09 05:03:20 +08:00
|
|
|
if (event->attr.precise_ip) {
|
|
|
|
int precise = 0;
|
|
|
|
|
|
|
|
/* Support for constant skid */
|
2010-10-19 20:22:50 +08:00
|
|
|
if (x86_pmu.pebs_active) {
|
2010-04-09 05:03:20 +08:00
|
|
|
precise++;
|
|
|
|
|
2010-10-19 20:38:11 +08:00
|
|
|
/* Support for IP fixup */
|
|
|
|
if (x86_pmu.lbr_nr)
|
|
|
|
precise++;
|
|
|
|
}
|
2010-04-09 05:03:20 +08:00
|
|
|
|
|
|
|
if (event->attr.precise_ip > precise)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
2010-03-12 00:54:39 +08:00
|
|
|
/*
|
|
|
|
* Generate PMC IRQs:
|
|
|
|
* (keep 'enabled' bit clear for now)
|
|
|
|
*/
|
2010-03-30 23:00:06 +08:00
|
|
|
event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
|
2010-03-12 00:54:39 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Count user and OS events unless requested not to
|
|
|
|
*/
|
2010-03-30 23:00:06 +08:00
|
|
|
if (!event->attr.exclude_user)
|
|
|
|
event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
|
|
|
|
if (!event->attr.exclude_kernel)
|
|
|
|
event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
|
2010-03-12 00:54:39 +08:00
|
|
|
|
2010-03-30 23:00:06 +08:00
|
|
|
if (event->attr.type == PERF_TYPE_RAW)
|
|
|
|
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
|
2010-03-12 00:54:39 +08:00
|
|
|
|
2010-04-14 04:23:12 +08:00
|
|
|
return x86_setup_perfctr(event);
|
2010-03-30 17:28:21 +08:00
|
|
|
}
|
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
/*
|
2009-06-03 01:22:16 +08:00
|
|
|
* Setup the hardware configuration for a given attr_type
|
2008-12-03 17:39:53 +08:00
|
|
|
*/
|
2010-06-11 19:35:08 +08:00
|
|
|
static int __x86_pmu_event_init(struct perf_event *event)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2009-03-31 01:07:16 +08:00
|
|
|
int err;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-04-29 18:47:20 +08:00
|
|
|
if (!x86_pmu_initialized())
|
|
|
|
return -ENODEV;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-03-31 01:07:16 +08:00
|
|
|
err = 0;
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
if (!atomic_inc_not_zero(&active_events)) {
|
2009-03-31 01:07:16 +08:00
|
|
|
mutex_lock(&pmc_reserve_mutex);
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
if (atomic_read(&active_events) == 0) {
|
2009-07-21 21:56:48 +08:00
|
|
|
if (!reserve_pmc_hardware())
|
|
|
|
err = -EBUSY;
|
2010-10-19 20:50:02 +08:00
|
|
|
else
|
|
|
|
reserve_ds_buffers();
|
2009-07-21 21:56:48 +08:00
|
|
|
}
|
|
|
|
if (!err)
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
atomic_inc(&active_events);
|
2009-03-31 01:07:16 +08:00
|
|
|
mutex_unlock(&pmc_reserve_mutex);
|
|
|
|
}
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
event->destroy = hw_perf_event_destroy;
|
2009-09-09 16:04:47 +08:00
|
|
|
|
2010-04-14 04:23:10 +08:00
|
|
|
event->hw.idx = -1;
|
|
|
|
event->hw.last_cpu = -1;
|
|
|
|
event->hw.last_tag = ~0ULL;
|
2009-10-06 22:42:09 +08:00
|
|
|
|
2010-04-14 04:23:12 +08:00
|
|
|
return x86_pmu.hw_config(event);
|
2010-04-14 04:23:10 +08:00
|
|
|
}
|
|
|
|
|
2010-01-29 20:25:31 +08:00
|
|
|
static void x86_pmu_disable_all(void)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
{
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
2009-05-13 22:21:38 +08:00
|
|
|
int idx;
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2009-03-06 01:08:27 +08:00
|
|
|
u64 val;
|
|
|
|
|
2009-04-29 22:55:56 +08:00
|
|
|
if (!test_bit(idx, cpuc->active_mask))
|
2009-04-29 18:47:01 +08:00
|
|
|
continue;
|
2011-02-03 00:40:57 +08:00
|
|
|
rdmsrl(x86_pmu_config_addr(idx), val);
|
2010-03-01 21:21:23 +08:00
|
|
|
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
|
2009-04-29 18:47:01 +08:00
|
|
|
continue;
|
2010-03-01 21:21:23 +08:00
|
|
|
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
|
2011-02-03 00:40:57 +08:00
|
|
|
wrmsrl(x86_pmu_config_addr(idx), val);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
static void x86_pmu_disable(struct pmu *pmu)
|
2009-02-27 20:39:09 +08:00
|
|
|
{
|
2010-01-18 16:58:01 +08:00
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
|
2009-04-29 18:47:20 +08:00
|
|
|
if (!x86_pmu_initialized())
|
2009-05-13 22:21:38 +08:00
|
|
|
return;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
2010-01-28 06:07:47 +08:00
|
|
|
if (!cpuc->enabled)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cpuc->n_added = 0;
|
|
|
|
cpuc->enabled = 0;
|
|
|
|
barrier();
|
2010-01-18 16:58:01 +08:00
|
|
|
|
|
|
|
x86_pmu.disable_all();
|
2009-02-27 20:39:09 +08:00
|
|
|
}
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2011-02-03 00:40:56 +08:00
|
|
|
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
|
|
|
|
u64 enable_mask)
|
|
|
|
{
|
2011-03-03 10:34:47 +08:00
|
|
|
if (hwc->extra_reg)
|
|
|
|
wrmsrl(hwc->extra_reg, hwc->extra_config);
|
2011-02-03 00:40:59 +08:00
|
|
|
wrmsrl(hwc->config_base, hwc->config | enable_mask);
|
2011-02-03 00:40:56 +08:00
|
|
|
}
|
|
|
|
|
2010-03-26 21:08:44 +08:00
|
|
|
static void x86_pmu_enable_all(int added)
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
{
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
int idx;
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2011-02-03 00:40:56 +08:00
|
|
|
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
|
2009-03-06 01:08:27 +08:00
|
|
|
|
2009-04-29 22:55:56 +08:00
|
|
|
if (!test_bit(idx, cpuc->active_mask))
|
2009-04-29 18:47:01 +08:00
|
|
|
continue;
|
2009-07-10 15:59:56 +08:00
|
|
|
|
2011-02-03 00:40:56 +08:00
|
|
|
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-11 19:35:57 +08:00
|
|
|
static struct pmu pmu;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
|
|
|
static inline int is_x86_event(struct perf_event *event)
|
|
|
|
{
|
|
|
|
return event->pmu == &pmu;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
|
|
|
{
|
2010-01-22 23:32:17 +08:00
|
|
|
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
|
2010-01-18 16:58:01 +08:00
|
|
|
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
2010-01-22 23:40:12 +08:00
|
|
|
int i, j, w, wmax, num = 0;
|
2010-01-18 16:58:01 +08:00
|
|
|
struct hw_perf_event *hwc;
|
|
|
|
|
|
|
|
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
2010-02-01 22:36:30 +08:00
|
|
|
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
|
|
|
|
constraints[i] = c;
|
2010-01-18 16:58:01 +08:00
|
|
|
}
|
|
|
|
|
2010-01-21 23:39:01 +08:00
|
|
|
/*
|
|
|
|
* fastpath, try to reuse previous register
|
|
|
|
*/
|
2010-01-22 23:40:12 +08:00
|
|
|
for (i = 0; i < n; i++) {
|
2010-01-21 23:39:01 +08:00
|
|
|
hwc = &cpuc->event_list[i]->hw;
|
2010-01-22 21:55:22 +08:00
|
|
|
c = constraints[i];
|
2010-01-21 23:39:01 +08:00
|
|
|
|
|
|
|
/* never assigned */
|
|
|
|
if (hwc->idx == -1)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* constraint still honored */
|
2010-01-22 23:32:17 +08:00
|
|
|
if (!test_bit(hwc->idx, c->idxmsk))
|
2010-01-21 23:39:01 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
/* not already used */
|
|
|
|
if (test_bit(hwc->idx, used_mask))
|
|
|
|
break;
|
|
|
|
|
2010-03-03 04:16:55 +08:00
|
|
|
__set_bit(hwc->idx, used_mask);
|
2010-01-21 23:39:01 +08:00
|
|
|
if (assign)
|
|
|
|
assign[i] = hwc->idx;
|
|
|
|
}
|
2010-01-22 23:40:12 +08:00
|
|
|
if (i == n)
|
2010-01-21 23:39:01 +08:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* begin slow path
|
|
|
|
*/
|
|
|
|
|
|
|
|
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
|
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
/*
|
|
|
|
* weight = number of possible counters
|
|
|
|
*
|
|
|
|
* 1 = most constrained, only works on one counter
|
|
|
|
* wmax = least constrained, works on any counter
|
|
|
|
*
|
|
|
|
* assign events to counters starting with most
|
|
|
|
* constrained events.
|
|
|
|
*/
|
2010-03-30 00:36:50 +08:00
|
|
|
wmax = x86_pmu.num_counters;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* when fixed event counters are present,
|
|
|
|
* wmax is incremented by 1 to account
|
|
|
|
* for one more choice
|
|
|
|
*/
|
2010-03-30 00:36:50 +08:00
|
|
|
if (x86_pmu.num_counters_fixed)
|
2010-01-18 16:58:01 +08:00
|
|
|
wmax++;
|
|
|
|
|
2010-01-21 23:39:01 +08:00
|
|
|
for (w = 1, num = n; num && w <= wmax; w++) {
|
2010-01-18 16:58:01 +08:00
|
|
|
/* for each event */
|
2010-01-21 23:39:01 +08:00
|
|
|
for (i = 0; num && i < n; i++) {
|
2010-01-22 21:55:22 +08:00
|
|
|
c = constraints[i];
|
2010-01-18 16:58:01 +08:00
|
|
|
hwc = &cpuc->event_list[i]->hw;
|
|
|
|
|
2010-01-22 23:32:17 +08:00
|
|
|
if (c->weight != w)
|
2010-01-18 16:58:01 +08:00
|
|
|
continue;
|
|
|
|
|
2010-03-06 05:41:37 +08:00
|
|
|
for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
|
2010-01-18 16:58:01 +08:00
|
|
|
if (!test_bit(j, used_mask))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (j == X86_PMC_IDX_MAX)
|
|
|
|
break;
|
|
|
|
|
2010-03-03 04:16:55 +08:00
|
|
|
__set_bit(j, used_mask);
|
2010-01-21 23:39:01 +08:00
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
if (assign)
|
|
|
|
assign[i] = j;
|
|
|
|
num--;
|
|
|
|
}
|
|
|
|
}
|
2010-01-21 23:39:01 +08:00
|
|
|
done:
|
2010-01-18 16:58:01 +08:00
|
|
|
/*
|
|
|
|
* scheduling failed or is just a simulation,
|
|
|
|
* free resources if necessary
|
|
|
|
*/
|
|
|
|
if (!assign || num) {
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
if (x86_pmu.put_event_constraints)
|
|
|
|
x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return num ? -ENOSPC : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* dogrp: true if must collect siblings events (group)
|
|
|
|
* returns total number of events and error code
|
|
|
|
*/
|
|
|
|
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
|
|
|
|
{
|
|
|
|
struct perf_event *event;
|
|
|
|
int n, max_count;
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
|
|
|
/* current number of events already accepted */
|
|
|
|
n = cpuc->n_events;
|
|
|
|
|
|
|
|
if (is_x86_event(leader)) {
|
|
|
|
if (n >= max_count)
|
|
|
|
return -ENOSPC;
|
|
|
|
cpuc->event_list[n] = leader;
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
if (!dogrp)
|
|
|
|
return n;
|
|
|
|
|
|
|
|
list_for_each_entry(event, &leader->sibling_list, group_entry) {
|
|
|
|
if (!is_x86_event(event) ||
|
2010-01-21 23:39:01 +08:00
|
|
|
event->state <= PERF_EVENT_STATE_OFF)
|
2010-01-18 16:58:01 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (n >= max_count)
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
cpuc->event_list[n] = event;
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void x86_assign_hw_event(struct perf_event *event,
|
2010-02-01 20:50:01 +08:00
|
|
|
struct cpu_hw_events *cpuc, int i)
|
2010-01-18 16:58:01 +08:00
|
|
|
{
|
2010-02-01 20:50:01 +08:00
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
hwc->idx = cpuc->assign[i];
|
|
|
|
hwc->last_cpu = smp_processor_id();
|
|
|
|
hwc->last_tag = ++cpuc->tags[i];
|
2010-01-18 16:58:01 +08:00
|
|
|
|
|
|
|
if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
|
|
|
|
hwc->config_base = 0;
|
|
|
|
hwc->event_base = 0;
|
|
|
|
} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
|
|
|
|
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
perf, x86: Fix Intel fixed counters base initialization
The following patch solves the problems introduced by Robert's
commit 41bf498 and reported by Arun Sharma. This commit gets rid
of the base + index notation for reading and writing PMU msrs.
The problem is that for fixed counters, the new calculation for
the base did not take into account the fixed counter indexes,
thus all fixed counters were read/written from fixed counter 0.
Although all fixed counters share the same config MSR, they each
have their own counter register.
Without:
$ task -e unhalted_core_cycles -e instructions_retired -e baclears noploop 1 noploop for 1 seconds
242202299 unhalted_core_cycles (0.00% scaling, ena=1000790892, run=1000790892)
2389685946 instructions_retired (0.00% scaling, ena=1000790892, run=1000790892)
49473 baclears (0.00% scaling, ena=1000790892, run=1000790892)
With:
$ task -e unhalted_core_cycles -e instructions_retired -e baclears noploop 1 noploop for 1 seconds
2392703238 unhalted_core_cycles (0.00% scaling, ena=1000840809, run=1000840809)
2389793744 instructions_retired (0.00% scaling, ena=1000840809, run=1000840809)
47863 baclears (0.00% scaling, ena=1000840809, run=1000840809)
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ming.m.lin@intel.com
Cc: robert.richter@amd.com
Cc: asharma@fb.com
Cc: perfmon2-devel@lists.sf.net
LKML-Reference: <20110319172005.GB4978@quad>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-03-20 01:20:05 +08:00
|
|
|
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
|
2010-01-18 16:58:01 +08:00
|
|
|
} else {
|
2011-02-03 00:40:59 +08:00
|
|
|
hwc->config_base = x86_pmu_config_addr(hwc->idx);
|
|
|
|
hwc->event_base = x86_pmu_event_addr(hwc->idx);
|
2010-01-18 16:58:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-01 20:50:01 +08:00
|
|
|
static inline int match_prev_assignment(struct hw_perf_event *hwc,
|
|
|
|
struct cpu_hw_events *cpuc,
|
|
|
|
int i)
|
|
|
|
{
|
|
|
|
return hwc->idx == cpuc->assign[i] &&
|
|
|
|
hwc->last_cpu == smp_processor_id() &&
|
|
|
|
hwc->last_tag == cpuc->tags[i];
|
|
|
|
}
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
static void x86_pmu_start(struct perf_event *event, int flags);
|
|
|
|
static void x86_pmu_stop(struct perf_event *event, int flags);
|
2010-01-25 22:58:43 +08:00
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
static void x86_pmu_enable(struct pmu *pmu)
|
2008-12-13 16:00:03 +08:00
|
|
|
{
|
2010-01-18 16:58:01 +08:00
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
struct perf_event *event;
|
|
|
|
struct hw_perf_event *hwc;
|
2010-03-26 21:08:44 +08:00
|
|
|
int i, added = cpuc->n_added;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
2009-04-29 18:47:20 +08:00
|
|
|
if (!x86_pmu_initialized())
|
2008-12-15 01:36:30 +08:00
|
|
|
return;
|
2010-01-28 06:07:47 +08:00
|
|
|
|
|
|
|
if (cpuc->enabled)
|
|
|
|
return;
|
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
if (cpuc->n_added) {
|
2010-03-06 20:20:40 +08:00
|
|
|
int n_running = cpuc->n_events - cpuc->n_added;
|
2010-01-18 16:58:01 +08:00
|
|
|
/*
|
|
|
|
* apply assignment obtained either from
|
|
|
|
* hw_perf_group_sched_in() or x86_pmu_enable()
|
|
|
|
*
|
|
|
|
* step1: save events moving to new counters
|
|
|
|
* step2: reprogram moved events into new counters
|
|
|
|
*/
|
2010-03-06 20:20:40 +08:00
|
|
|
for (i = 0; i < n_running; i++) {
|
2010-01-18 16:58:01 +08:00
|
|
|
event = cpuc->event_list[i];
|
|
|
|
hwc = &event->hw;
|
|
|
|
|
2010-02-01 20:50:01 +08:00
|
|
|
/*
|
|
|
|
* we can avoid reprogramming counter if:
|
|
|
|
* - assigned same counter as last time
|
|
|
|
* - running on same CPU as last time
|
|
|
|
* - no other event has used the counter since
|
|
|
|
*/
|
|
|
|
if (hwc->idx == -1 ||
|
|
|
|
match_prev_assignment(hwc, cpuc, i))
|
2010-01-18 16:58:01 +08:00
|
|
|
continue;
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
/*
|
|
|
|
* Ensure we don't accidentally enable a stopped
|
|
|
|
* counter simply because we rescheduled.
|
|
|
|
*/
|
|
|
|
if (hwc->state & PERF_HES_STOPPED)
|
|
|
|
hwc->state |= PERF_HES_ARCH;
|
|
|
|
|
|
|
|
x86_pmu_stop(event, PERF_EF_UPDATE);
|
2010-01-18 16:58:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < cpuc->n_events; i++) {
|
|
|
|
event = cpuc->event_list[i];
|
|
|
|
hwc = &event->hw;
|
|
|
|
|
2010-03-11 20:40:30 +08:00
|
|
|
if (!match_prev_assignment(hwc, cpuc, i))
|
2010-02-01 20:50:01 +08:00
|
|
|
x86_assign_hw_event(event, cpuc, i);
|
2010-03-11 20:40:30 +08:00
|
|
|
else if (i < n_running)
|
|
|
|
continue;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
if (hwc->state & PERF_HES_ARCH)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
x86_pmu_start(event, PERF_EF_RELOAD);
|
2010-01-18 16:58:01 +08:00
|
|
|
}
|
|
|
|
cpuc->n_added = 0;
|
|
|
|
perf_events_lapic_init();
|
|
|
|
}
|
2010-01-28 06:07:47 +08:00
|
|
|
|
|
|
|
cpuc->enabled = 1;
|
|
|
|
barrier();
|
|
|
|
|
2010-03-26 21:08:44 +08:00
|
|
|
x86_pmu.enable_all(added);
|
2008-12-13 16:00:03 +08:00
|
|
|
}
|
|
|
|
|
2010-03-03 03:32:08 +08:00
|
|
|
static inline void x86_pmu_disable_event(struct perf_event *event)
|
2009-03-06 01:08:27 +08:00
|
|
|
{
|
2010-03-03 03:32:08 +08:00
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
2010-03-08 20:51:31 +08:00
|
|
|
|
2011-02-03 00:40:59 +08:00
|
|
|
wrmsrl(hwc->config_base, hwc->config);
|
2009-03-06 01:08:27 +08:00
|
|
|
}
|
|
|
|
|
2009-06-24 14:13:48 +08:00
|
|
|
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
|
|
|
* Set the next IRQ period, based on the hwc->period_left value.
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* To be called with the event disabled in hw:
|
2008-12-13 16:00:03 +08:00
|
|
|
*/
|
2009-06-02 22:08:20 +08:00
|
|
|
static int
|
2010-03-03 03:16:01 +08:00
|
|
|
x86_perf_event_set_period(struct perf_event *event)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2010-03-03 03:16:01 +08:00
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
2010-05-21 20:43:08 +08:00
|
|
|
s64 left = local64_read(&hwc->period_left);
|
2009-06-02 22:08:20 +08:00
|
|
|
s64 period = hwc->sample_period;
|
2010-03-08 20:51:31 +08:00
|
|
|
int ret = 0, idx = hwc->idx;
|
2008-12-13 16:00:03 +08:00
|
|
|
|
2009-07-21 21:56:48 +08:00
|
|
|
if (idx == X86_PMC_IDX_FIXED_BTS)
|
|
|
|
return 0;
|
|
|
|
|
2008-12-13 16:00:03 +08:00
|
|
|
/*
|
tree-wide: fix assorted typos all over the place
That is "success", "unknown", "through", "performance", "[re|un]mapping"
, "access", "default", "reasonable", "[con]currently", "temperature"
, "channel", "[un]used", "application", "example","hierarchy", "therefore"
, "[over|under]flow", "contiguous", "threshold", "enough" and others.
Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2009-11-14 23:09:05 +08:00
|
|
|
* If we are way outside a reasonable range then just skip forward:
|
2008-12-13 16:00:03 +08:00
|
|
|
*/
|
|
|
|
if (unlikely(left <= -period)) {
|
|
|
|
left = period;
|
2010-05-21 20:43:08 +08:00
|
|
|
local64_set(&hwc->period_left, left);
|
2009-06-11 03:34:59 +08:00
|
|
|
hwc->last_period = period;
|
2009-06-02 22:08:20 +08:00
|
|
|
ret = 1;
|
2008-12-13 16:00:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(left <= 0)) {
|
|
|
|
left += period;
|
2010-05-21 20:43:08 +08:00
|
|
|
local64_set(&hwc->period_left, left);
|
2009-06-11 03:34:59 +08:00
|
|
|
hwc->last_period = period;
|
2009-06-02 22:08:20 +08:00
|
|
|
ret = 1;
|
2008-12-13 16:00:03 +08:00
|
|
|
}
|
2009-05-15 14:25:22 +08:00
|
|
|
/*
|
2009-09-21 17:31:35 +08:00
|
|
|
* Quirk: certain CPUs dont like it if just 1 hw_event is left:
|
2009-05-15 14:25:22 +08:00
|
|
|
*/
|
|
|
|
if (unlikely(left < 2))
|
|
|
|
left = 2;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-06-02 22:08:20 +08:00
|
|
|
if (left > x86_pmu.max_period)
|
|
|
|
left = x86_pmu.max_period;
|
|
|
|
|
2009-06-24 14:13:48 +08:00
|
|
|
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
|
2008-12-13 16:00:03 +08:00
|
|
|
|
|
|
|
/*
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* The hw event starts counting from this event offset,
|
2008-12-13 16:00:03 +08:00
|
|
|
* mark it to be able to extra future deltas:
|
|
|
|
*/
|
2010-05-21 20:43:08 +08:00
|
|
|
local64_set(&hwc->prev_count, (u64)-left);
|
2008-12-13 16:00:03 +08:00
|
|
|
|
2011-02-03 00:40:59 +08:00
|
|
|
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
|
2010-06-03 05:23:04 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Due to erratum on certan cpu we need
|
|
|
|
* a second write to be sure the register
|
|
|
|
* is updated properly
|
|
|
|
*/
|
|
|
|
if (x86_pmu.perfctr_second_write) {
|
2011-02-03 00:40:59 +08:00
|
|
|
wrmsrl(hwc->event_base,
|
2010-03-30 00:36:50 +08:00
|
|
|
(u64)(-left) & x86_pmu.cntval_mask);
|
2010-06-03 05:23:04 +08:00
|
|
|
}
|
2009-06-02 22:08:20 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
perf_event_update_userpage(event);
|
2009-06-22 22:35:24 +08:00
|
|
|
|
2009-06-02 22:08:20 +08:00
|
|
|
return ret;
|
2008-12-22 18:10:42 +08:00
|
|
|
}
|
|
|
|
|
2010-03-03 03:32:08 +08:00
|
|
|
static void x86_pmu_enable_event(struct perf_event *event)
|
2009-04-29 18:47:18 +08:00
|
|
|
{
|
2010-12-18 23:28:55 +08:00
|
|
|
if (__this_cpu_read(cpu_hw_events.enabled))
|
2010-04-14 04:23:14 +08:00
|
|
|
__x86_pmu_enable_event(&event->hw,
|
|
|
|
ARCH_PERFMON_EVENTSEL_ENABLE);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
2009-10-06 22:42:09 +08:00
|
|
|
/*
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
* Add a single event to the PMU.
|
2010-01-18 16:58:01 +08:00
|
|
|
*
|
|
|
|
* The event is added to the group of enabled events
|
|
|
|
* but only if it can be scehduled with existing events.
|
2009-10-08 17:56:07 +08:00
|
|
|
*/
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
static int x86_pmu_add(struct perf_event *event, int flags)
|
2009-10-08 17:56:07 +08:00
|
|
|
{
|
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
2010-01-18 16:58:01 +08:00
|
|
|
struct hw_perf_event *hwc;
|
|
|
|
int assign[X86_PMC_IDX_MAX];
|
|
|
|
int n, n0, ret;
|
2009-10-08 17:56:07 +08:00
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
hwc = &event->hw;
|
2009-10-08 17:56:07 +08:00
|
|
|
|
2010-06-14 14:49:00 +08:00
|
|
|
perf_pmu_disable(event->pmu);
|
2010-01-18 16:58:01 +08:00
|
|
|
n0 = cpuc->n_events;
|
2010-06-11 23:32:03 +08:00
|
|
|
ret = n = collect_events(cpuc, event, false);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
2009-05-26 03:41:28 +08:00
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
|
|
|
if (!(flags & PERF_EF_START))
|
|
|
|
hwc->state |= PERF_HES_ARCH;
|
|
|
|
|
2010-04-23 13:56:12 +08:00
|
|
|
/*
|
|
|
|
* If group events scheduling transaction was started,
|
2011-03-18 03:24:16 +08:00
|
|
|
* skip the schedulability test here, it will be performed
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
* at commit time (->commit_txn) as a whole
|
2010-04-23 13:56:12 +08:00
|
|
|
*/
|
2010-05-25 23:49:05 +08:00
|
|
|
if (cpuc->group_flag & PERF_EVENT_TXN)
|
2010-06-11 23:32:03 +08:00
|
|
|
goto done_collect;
|
2010-04-23 13:56:12 +08:00
|
|
|
|
2010-03-12 00:54:39 +08:00
|
|
|
ret = x86_pmu.schedule_events(cpuc, n, assign);
|
2010-01-18 16:58:01 +08:00
|
|
|
if (ret)
|
2010-06-11 23:32:03 +08:00
|
|
|
goto out;
|
2010-01-18 16:58:01 +08:00
|
|
|
/*
|
|
|
|
* copy new assignment, now we know it is possible
|
|
|
|
* will be used by hw_perf_enable()
|
|
|
|
*/
|
|
|
|
memcpy(cpuc->assign, assign, n*sizeof(int));
|
2008-12-09 18:40:46 +08:00
|
|
|
|
2010-06-11 23:32:03 +08:00
|
|
|
done_collect:
|
2010-01-18 16:58:01 +08:00
|
|
|
cpuc->n_events = n;
|
2010-03-06 20:49:56 +08:00
|
|
|
cpuc->n_added += n - n0;
|
perf_events: Fix event scheduling issues introduced by transactional API
The transactional API patch between the generic and model-specific
code introduced several important bugs with event scheduling, at
least on X86. If you had pinned events, e.g., watchdog, and were
over-committing the PMU, you would get bogus counts. The bug was
showing up on Intel CPU because events would move around more
often that on AMD. But the problem also existed on AMD, though
harder to expose.
The issues were:
- group_sched_in() was missing a cancel_txn() in the error path
- cpuc->n_added was not properly maintained, leading to missing
actions in hw_perf_enable(), i.e., n_running being 0. You cannot
update n_added until you know the transaction has succeeded. In
case of failed transaction n_added was not adjusted back.
- in case of failed transactions, event_sched_out() was called
and eventually invoked x86_disable_event() to touch the HW reg.
But with transactions, on X86, event_sched_in() does not touch
HW registers, it simply collects events into a list. Thus, you
could end up calling x86_disable_event() on a counter which
did not correspond to the current event when idx != -1.
The patch modifies the generic and X86 code to avoid all those problems.
First, we keep track of the number of events added last. In case the
transaction fails, we substract them from n_added. This approach is
necessary (as opposed to delaying updates to n_added) because not all
event updates use the transaction API, e.g., single events.
Second, we encapsulate the event_sched_in() and event_sched_out() in
group_sched_in() inside the transaction. That makes the operations
symmetrical and you can also detect that you are inside a transaction
and skip the HW reg access by checking cpuc->group_flag.
With this patch, you can now overcommit the PMU even with pinned
system-wide events present and still get valid counts.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274796225.5882.1389.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-05-25 22:23:10 +08:00
|
|
|
cpuc->n_txn += n - n0;
|
2008-12-21 20:50:42 +08:00
|
|
|
|
2010-06-11 23:32:03 +08:00
|
|
|
ret = 0;
|
|
|
|
out:
|
2010-06-14 14:49:00 +08:00
|
|
|
perf_pmu_enable(event->pmu);
|
2010-06-11 23:32:03 +08:00
|
|
|
return ret;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
static void x86_pmu_start(struct perf_event *event, int flags)
|
2010-02-08 23:06:01 +08:00
|
|
|
{
|
2010-03-06 20:19:24 +08:00
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
int idx = event->hw.idx;
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(idx == -1))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (flags & PERF_EF_RELOAD) {
|
|
|
|
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
|
|
|
|
x86_perf_event_set_period(event);
|
|
|
|
}
|
|
|
|
|
|
|
|
event->hw.state = 0;
|
2010-02-08 23:06:01 +08:00
|
|
|
|
2010-03-06 20:19:24 +08:00
|
|
|
cpuc->events[idx] = event;
|
|
|
|
__set_bit(idx, cpuc->active_mask);
|
2010-09-16 00:20:34 +08:00
|
|
|
__set_bit(idx, cpuc->running);
|
2010-03-03 03:32:08 +08:00
|
|
|
x86_pmu.enable(event);
|
2010-03-06 20:19:24 +08:00
|
|
|
perf_event_update_userpage(event);
|
2009-05-25 23:39:05 +08:00
|
|
|
}
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
void perf_event_print_debug(void)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2008-12-22 18:10:42 +08:00
|
|
|
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
u64 pebs;
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
struct cpu_hw_events *cpuc;
|
2009-05-13 14:12:51 +08:00
|
|
|
unsigned long flags;
|
2008-12-09 19:18:18 +08:00
|
|
|
int cpu, idx;
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
if (!x86_pmu.num_counters)
|
2008-12-09 19:18:18 +08:00
|
|
|
return;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-05-13 14:12:51 +08:00
|
|
|
local_irq_save(flags);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
|
|
|
cpu = smp_processor_id();
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
cpuc = &per_cpu(cpu_hw_events, cpu);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-04-29 18:47:13 +08:00
|
|
|
if (x86_pmu.version >= 2) {
|
2009-02-28 21:15:39 +08:00
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
|
|
|
|
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
|
2009-02-28 21:15:39 +08:00
|
|
|
|
|
|
|
pr_info("\n");
|
|
|
|
pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
|
|
|
|
pr_info("CPU#%d: status: %016llx\n", cpu, status);
|
|
|
|
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
|
|
|
|
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
}
|
2010-03-08 20:51:31 +08:00
|
|
|
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2011-02-03 00:40:57 +08:00
|
|
|
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
|
|
|
|
rdmsrl(x86_pmu_event_addr(idx), pmc_count);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-06-24 14:13:48 +08:00
|
|
|
prev_left = per_cpu(pmc_prev_left[idx], cpu);
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
|
2008-12-03 17:39:53 +08:00
|
|
|
cpu, idx, pmc_ctrl);
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
|
2008-12-03 17:39:53 +08:00
|
|
|
cpu, idx, pmc_count);
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
|
2008-12-13 16:00:03 +08:00
|
|
|
cpu, idx, prev_left);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
2010-03-30 00:36:50 +08:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
|
2008-12-22 18:10:42 +08:00
|
|
|
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
|
|
|
|
|
2009-02-28 21:15:39 +08:00
|
|
|
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
|
2008-12-22 18:10:42 +08:00
|
|
|
cpu, idx, pmc_count);
|
|
|
|
}
|
2009-05-13 14:12:51 +08:00
|
|
|
local_irq_restore(flags);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
static void x86_pmu_stop(struct perf_event *event, int flags)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2010-02-08 23:06:01 +08:00
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
|
|
|
|
x86_pmu.disable(event);
|
|
|
|
cpuc->events[hwc->idx] = NULL;
|
|
|
|
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
|
|
|
hwc->state |= PERF_HES_STOPPED;
|
|
|
|
}
|
2009-07-21 21:56:48 +08:00
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
|
|
|
/*
|
|
|
|
* Drain the remaining delta count out of a event
|
|
|
|
* that we are disabling:
|
|
|
|
*/
|
|
|
|
x86_perf_event_update(event);
|
|
|
|
hwc->state |= PERF_HES_UPTODATE;
|
|
|
|
}
|
2010-01-25 22:58:43 +08:00
|
|
|
}
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
static void x86_pmu_del(struct perf_event *event, int flags)
|
2010-01-25 22:58:43 +08:00
|
|
|
{
|
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
int i;
|
|
|
|
|
perf_events: Fix event scheduling issues introduced by transactional API
The transactional API patch between the generic and model-specific
code introduced several important bugs with event scheduling, at
least on X86. If you had pinned events, e.g., watchdog, and were
over-committing the PMU, you would get bogus counts. The bug was
showing up on Intel CPU because events would move around more
often that on AMD. But the problem also existed on AMD, though
harder to expose.
The issues were:
- group_sched_in() was missing a cancel_txn() in the error path
- cpuc->n_added was not properly maintained, leading to missing
actions in hw_perf_enable(), i.e., n_running being 0. You cannot
update n_added until you know the transaction has succeeded. In
case of failed transaction n_added was not adjusted back.
- in case of failed transactions, event_sched_out() was called
and eventually invoked x86_disable_event() to touch the HW reg.
But with transactions, on X86, event_sched_in() does not touch
HW registers, it simply collects events into a list. Thus, you
could end up calling x86_disable_event() on a counter which
did not correspond to the current event when idx != -1.
The patch modifies the generic and X86 code to avoid all those problems.
First, we keep track of the number of events added last. In case the
transaction fails, we substract them from n_added. This approach is
necessary (as opposed to delaying updates to n_added) because not all
event updates use the transaction API, e.g., single events.
Second, we encapsulate the event_sched_in() and event_sched_out() in
group_sched_in() inside the transaction. That makes the operations
symmetrical and you can also detect that you are inside a transaction
and skip the HW reg access by checking cpuc->group_flag.
With this patch, you can now overcommit the PMU even with pinned
system-wide events present and still get valid counts.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274796225.5882.1389.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-05-25 22:23:10 +08:00
|
|
|
/*
|
|
|
|
* If we're called during a txn, we don't need to do anything.
|
|
|
|
* The events never got scheduled and ->cancel_txn will truncate
|
|
|
|
* the event_list.
|
|
|
|
*/
|
2010-05-25 23:49:05 +08:00
|
|
|
if (cpuc->group_flag & PERF_EVENT_TXN)
|
perf_events: Fix event scheduling issues introduced by transactional API
The transactional API patch between the generic and model-specific
code introduced several important bugs with event scheduling, at
least on X86. If you had pinned events, e.g., watchdog, and were
over-committing the PMU, you would get bogus counts. The bug was
showing up on Intel CPU because events would move around more
often that on AMD. But the problem also existed on AMD, though
harder to expose.
The issues were:
- group_sched_in() was missing a cancel_txn() in the error path
- cpuc->n_added was not properly maintained, leading to missing
actions in hw_perf_enable(), i.e., n_running being 0. You cannot
update n_added until you know the transaction has succeeded. In
case of failed transaction n_added was not adjusted back.
- in case of failed transactions, event_sched_out() was called
and eventually invoked x86_disable_event() to touch the HW reg.
But with transactions, on X86, event_sched_in() does not touch
HW registers, it simply collects events into a list. Thus, you
could end up calling x86_disable_event() on a counter which
did not correspond to the current event when idx != -1.
The patch modifies the generic and X86 code to avoid all those problems.
First, we keep track of the number of events added last. In case the
transaction fails, we substract them from n_added. This approach is
necessary (as opposed to delaying updates to n_added) because not all
event updates use the transaction API, e.g., single events.
Second, we encapsulate the event_sched_in() and event_sched_out() in
group_sched_in() inside the transaction. That makes the operations
symmetrical and you can also detect that you are inside a transaction
and skip the HW reg access by checking cpuc->group_flag.
With this patch, you can now overcommit the PMU even with pinned
system-wide events present and still get valid counts.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274796225.5882.1389.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-05-25 22:23:10 +08:00
|
|
|
return;
|
|
|
|
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
x86_pmu_stop(event, PERF_EF_UPDATE);
|
2009-06-22 22:35:24 +08:00
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
for (i = 0; i < cpuc->n_events; i++) {
|
|
|
|
if (event == cpuc->event_list[i]) {
|
|
|
|
|
|
|
|
if (x86_pmu.put_event_constraints)
|
|
|
|
x86_pmu.put_event_constraints(cpuc, event);
|
|
|
|
|
|
|
|
while (++i < cpuc->n_events)
|
|
|
|
cpuc->event_list[i-1] = cpuc->event_list[i];
|
|
|
|
|
|
|
|
--cpuc->n_events;
|
2010-01-25 18:57:25 +08:00
|
|
|
break;
|
2010-01-18 16:58:01 +08:00
|
|
|
}
|
|
|
|
}
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
perf_event_update_userpage(event);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
2010-01-29 20:25:31 +08:00
|
|
|
static int x86_pmu_handle_irq(struct pt_regs *regs)
|
2009-04-29 18:47:21 +08:00
|
|
|
{
|
2009-06-11 03:02:22 +08:00
|
|
|
struct perf_sample_data data;
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
struct cpu_hw_events *cpuc;
|
|
|
|
struct perf_event *event;
|
2009-07-09 05:46:14 +08:00
|
|
|
int idx, handled = 0;
|
2009-05-15 14:26:20 +08:00
|
|
|
u64 val;
|
|
|
|
|
2010-03-03 22:55:04 +08:00
|
|
|
perf_sample_data_init(&data, 0);
|
2009-06-11 03:02:22 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
cpuc = &__get_cpu_var(cpu_hw_events);
|
2009-05-13 19:21:36 +08:00
|
|
|
|
2011-04-27 18:32:33 +08:00
|
|
|
/*
|
|
|
|
* Some chipsets need to unmask the LVTPC in a particular spot
|
|
|
|
* inside the nmi handler. As a result, the unmasking was pushed
|
|
|
|
* into all the nmi handlers.
|
|
|
|
*
|
|
|
|
* This generic handler doesn't seem to have any issues where the
|
|
|
|
* unmasking occurs so it was left at the top.
|
|
|
|
*/
|
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
2010-09-16 00:20:34 +08:00
|
|
|
if (!test_bit(idx, cpuc->active_mask)) {
|
|
|
|
/*
|
|
|
|
* Though we deactivated the counter some cpus
|
|
|
|
* might still deliver spurious interrupts still
|
|
|
|
* in flight. Catch them:
|
|
|
|
*/
|
|
|
|
if (__test_and_clear_bit(idx, cpuc->running))
|
|
|
|
handled++;
|
2009-04-29 18:47:21 +08:00
|
|
|
continue;
|
2010-09-16 00:20:34 +08:00
|
|
|
}
|
2009-05-13 19:21:36 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
event = cpuc->events[idx];
|
2009-05-14 20:52:17 +08:00
|
|
|
|
2010-03-03 03:18:39 +08:00
|
|
|
val = x86_perf_event_update(event);
|
2010-03-30 00:36:50 +08:00
|
|
|
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
|
2009-05-25 23:39:04 +08:00
|
|
|
continue;
|
2009-05-13 19:21:36 +08:00
|
|
|
|
2009-06-11 03:34:59 +08:00
|
|
|
/*
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
* event overflow
|
2009-06-11 03:34:59 +08:00
|
|
|
*/
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
handled++;
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
data.period = event->hw.last_period;
|
2009-06-11 03:34:59 +08:00
|
|
|
|
2010-03-03 03:16:01 +08:00
|
|
|
if (!x86_perf_event_set_period(event))
|
2009-06-02 22:08:20 +08:00
|
|
|
continue;
|
|
|
|
|
2011-06-27 20:41:57 +08:00
|
|
|
if (perf_event_overflow(event, &data, regs))
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
x86_pmu_stop(event, 0);
|
2009-04-29 18:47:21 +08:00
|
|
|
}
|
2009-05-13 19:21:36 +08:00
|
|
|
|
2009-06-11 03:34:59 +08:00
|
|
|
if (handled)
|
|
|
|
inc_irq_stat(apic_perf_irqs);
|
|
|
|
|
2009-04-29 18:47:21 +08:00
|
|
|
return handled;
|
|
|
|
}
|
2009-04-29 18:47:05 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
void perf_events_lapic_init(void)
|
2008-12-03 17:39:53 +08:00
|
|
|
{
|
2009-08-11 16:40:08 +08:00
|
|
|
if (!x86_pmu.apic || !x86_pmu_initialized())
|
2008-12-03 17:39:53 +08:00
|
|
|
return;
|
2009-04-29 18:47:20 +08:00
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
/*
|
2009-05-29 13:28:35 +08:00
|
|
|
* Always use NMI for PMU
|
2008-12-03 17:39:53 +08:00
|
|
|
*/
|
2009-05-29 13:28:35 +08:00
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
struct pmu_nmi_state {
|
|
|
|
unsigned int marked;
|
|
|
|
int handled;
|
|
|
|
};
|
|
|
|
|
|
|
|
static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
|
|
|
|
|
2008-12-03 17:39:53 +08:00
|
|
|
static int __kprobes
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
perf_event_nmi_handler(struct notifier_block *self,
|
2008-12-03 17:39:53 +08:00
|
|
|
unsigned long cmd, void *__args)
|
|
|
|
{
|
|
|
|
struct die_args *args = __args;
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
unsigned int this_nmi;
|
|
|
|
int handled;
|
2009-03-06 01:08:27 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
if (!atomic_read(&active_events))
|
2009-05-01 18:23:17 +08:00
|
|
|
return NOTIFY_DONE;
|
|
|
|
|
2009-03-06 01:08:27 +08:00
|
|
|
switch (cmd) {
|
|
|
|
case DIE_NMI:
|
|
|
|
break;
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
case DIE_NMIUNKNOWN:
|
|
|
|
this_nmi = percpu_read(irq_stat.__nmi_count);
|
2010-12-18 23:28:55 +08:00
|
|
|
if (this_nmi != __this_cpu_read(pmu_nmi.marked))
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
/* let the kernel handle the unknown nmi */
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
/*
|
|
|
|
* This one is a PMU back-to-back nmi. Two events
|
|
|
|
* trigger 'simultaneously' raising two back-to-back
|
|
|
|
* NMIs. If the first NMI handles both, the latter
|
|
|
|
* will be empty and daze the CPU. So, we drop it to
|
|
|
|
* avoid false-positive 'unknown nmi' messages.
|
|
|
|
*/
|
|
|
|
return NOTIFY_STOP;
|
2009-03-06 01:08:27 +08:00
|
|
|
default:
|
2008-12-03 17:39:53 +08:00
|
|
|
return NOTIFY_DONE;
|
2009-03-06 01:08:27 +08:00
|
|
|
}
|
2008-12-03 17:39:53 +08:00
|
|
|
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
handled = x86_pmu.handle_irq(args->regs);
|
|
|
|
if (!handled)
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
|
|
|
|
this_nmi = percpu_read(irq_stat.__nmi_count);
|
|
|
|
if ((handled > 1) ||
|
|
|
|
/* the next nmi could be a back-to-back nmi */
|
2010-12-18 23:28:55 +08:00
|
|
|
((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
|
|
|
|
(__this_cpu_read(pmu_nmi.handled) > 1))) {
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
/*
|
|
|
|
* We could have two subsequent back-to-back nmis: The
|
|
|
|
* first handles more than one counter, the 2nd
|
|
|
|
* handles only one counter and the 3rd handles no
|
|
|
|
* counter.
|
|
|
|
*
|
|
|
|
* This is the 2nd nmi because the previous was
|
|
|
|
* handling more than one counter. We will mark the
|
|
|
|
* next (3rd) and then drop it if unhandled.
|
|
|
|
*/
|
2010-12-18 23:28:55 +08:00
|
|
|
__this_cpu_write(pmu_nmi.marked, this_nmi + 1);
|
|
|
|
__this_cpu_write(pmu_nmi.handled, handled);
|
perf, x86: Try to handle unknown nmis with an enabled PMU
When the PMU is enabled it is valid to have unhandled nmis, two
events could trigger 'simultaneously' raising two back-to-back
NMIs. If the first NMI handles both, the latter will be empty
and daze the CPU.
The solution to avoid an 'unknown nmi' massage in this case was
simply to stop the nmi handler chain when the PMU is enabled by
stating the nmi was handled. This has the drawback that a) we
can not detect unknown nmis anymore, and b) subsequent nmi
handlers are not called.
This patch addresses this. Now, we check this unknown NMI if it
could be a PMU back-to-back NMI. Otherwise we pass it and let
the kernel handle the unknown nmi.
This is a debug log:
cpu #6, nmi #32333, skip_nmi #32330, handled = 1, time = 1934364430
cpu #6, nmi #32334, skip_nmi #32330, handled = 1, time = 1934704616
cpu #6, nmi #32335, skip_nmi #32336, handled = 2, time = 1936032320
cpu #6, nmi #32336, skip_nmi #32336, handled = 0, time = 1936034139
cpu #6, nmi #32337, skip_nmi #32336, handled = 1, time = 1936120100
cpu #6, nmi #32338, skip_nmi #32336, handled = 1, time = 1936404607
cpu #6, nmi #32339, skip_nmi #32336, handled = 1, time = 1937983416
cpu #6, nmi #32340, skip_nmi #32341, handled = 2, time = 1938201032
cpu #6, nmi #32341, skip_nmi #32341, handled = 0, time = 1938202830
cpu #6, nmi #32342, skip_nmi #32341, handled = 1, time = 1938443743
cpu #6, nmi #32343, skip_nmi #32341, handled = 1, time = 1939956552
cpu #6, nmi #32344, skip_nmi #32341, handled = 1, time = 1940073224
cpu #6, nmi #32345, skip_nmi #32341, handled = 1, time = 1940485677
cpu #6, nmi #32346, skip_nmi #32347, handled = 2, time = 1941947772
cpu #6, nmi #32347, skip_nmi #32347, handled = 1, time = 1941949818
cpu #6, nmi #32348, skip_nmi #32347, handled = 0, time = 1941951591
Uhhuh. NMI received for unknown reason 00 on CPU 6.
Do you have a strange power saving mode enabled?
Dazed and confused, but trying to continue
Deltas:
nmi #32334 340186
nmi #32335 1327704
nmi #32336 1819 <<<< back-to-back nmi [1]
nmi #32337 85961
nmi #32338 284507
nmi #32339 1578809
nmi #32340 217616
nmi #32341 1798 <<<< back-to-back nmi [2]
nmi #32342 240913
nmi #32343 1512809
nmi #32344 116672
nmi #32345 412453
nmi #32346 1462095 <<<< 1st nmi (standard) handling 2 counters
nmi #32347 2046 <<<< 2nd nmi (back-to-back) handling one
counter nmi #32348 1773 <<<< 3rd nmi (back-to-back)
handling no counter! [3]
For back-to-back nmi detection there are the following rules:
The PMU nmi handler was handling more than one counter and no
counter was handled in the subsequent nmi (see [1] and [2]
above).
There is another case if there are two subsequent back-to-back
nmis [3]. The 2nd is detected as back-to-back because the first
handled more than one counter. If the second handles one counter
and the 3rd handles nothing, we drop the 3rd nmi because it
could be a back-to-back nmi.
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ renamed nmi variable to pmu_nmi to avoid clash with .nmi in entry.S ]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: peterz@infradead.org
Cc: gorcunov@gmail.com
Cc: fweisbec@gmail.com
Cc: ying.huang@intel.com
Cc: ming.m.lin@intel.com
Cc: eranian@google.com
LKML-Reference: <1283454469-1909-3-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-09-03 03:07:48 +08:00
|
|
|
}
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2009-05-14 20:52:17 +08:00
|
|
|
return NOTIFY_STOP;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
|
|
|
|
2010-02-26 19:05:05 +08:00
|
|
|
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
|
|
|
|
.notifier_call = perf_event_nmi_handler,
|
|
|
|
.next = NULL,
|
2011-01-07 05:18:49 +08:00
|
|
|
.priority = NMI_LOCAL_LOW_PRIOR,
|
2010-02-26 19:05:05 +08:00
|
|
|
};
|
|
|
|
|
2010-01-22 23:32:17 +08:00
|
|
|
static struct event_constraint unconstrained;
|
2010-02-08 23:17:01 +08:00
|
|
|
static struct event_constraint emptyconstraint;
|
2010-01-22 23:32:17 +08:00
|
|
|
|
|
|
|
static struct event_constraint *
|
2010-02-26 19:05:05 +08:00
|
|
|
x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
|
2010-01-18 16:58:01 +08:00
|
|
|
{
|
2010-01-22 23:32:17 +08:00
|
|
|
struct event_constraint *c;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
|
|
|
if (x86_pmu.event_constraints) {
|
|
|
|
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
2010-01-22 23:32:17 +08:00
|
|
|
if ((event->hw.config & c->cmask) == c->code)
|
|
|
|
return c;
|
2010-01-18 16:58:01 +08:00
|
|
|
}
|
|
|
|
}
|
2010-01-22 23:32:17 +08:00
|
|
|
|
|
|
|
return &unconstrained;
|
2010-01-18 16:58:01 +08:00
|
|
|
}
|
|
|
|
|
2010-02-26 19:05:05 +08:00
|
|
|
#include "perf_event_amd.c"
|
|
|
|
#include "perf_event_p6.c"
|
2010-03-12 00:54:39 +08:00
|
|
|
#include "perf_event_p4.c"
|
2010-03-03 19:02:30 +08:00
|
|
|
#include "perf_event_intel_lbr.c"
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
#include "perf_event_intel_ds.c"
|
2010-02-26 19:05:05 +08:00
|
|
|
#include "perf_event_intel.c"
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
|
2010-03-05 20:01:18 +08:00
|
|
|
static int __cpuinit
|
|
|
|
x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
|
|
|
|
{
|
|
|
|
unsigned int cpu = (long)hcpu;
|
2010-03-24 02:31:15 +08:00
|
|
|
int ret = NOTIFY_OK;
|
2010-03-05 20:01:18 +08:00
|
|
|
|
|
|
|
switch (action & ~CPU_TASKS_FROZEN) {
|
|
|
|
case CPU_UP_PREPARE:
|
|
|
|
if (x86_pmu.cpu_prepare)
|
2010-03-24 02:31:15 +08:00
|
|
|
ret = x86_pmu.cpu_prepare(cpu);
|
2010-03-05 20:01:18 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CPU_STARTING:
|
|
|
|
if (x86_pmu.cpu_starting)
|
|
|
|
x86_pmu.cpu_starting(cpu);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CPU_DYING:
|
|
|
|
if (x86_pmu.cpu_dying)
|
|
|
|
x86_pmu.cpu_dying(cpu);
|
|
|
|
break;
|
|
|
|
|
2010-03-24 02:31:15 +08:00
|
|
|
case CPU_UP_CANCELED:
|
2010-03-05 20:01:18 +08:00
|
|
|
case CPU_DEAD:
|
|
|
|
if (x86_pmu.cpu_dead)
|
|
|
|
x86_pmu.cpu_dead(cpu);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-03-24 02:31:15 +08:00
|
|
|
return ret;
|
2010-03-05 20:01:18 +08:00
|
|
|
}
|
|
|
|
|
2009-12-11 00:56:34 +08:00
|
|
|
static void __init pmu_check_apic(void)
|
|
|
|
{
|
|
|
|
if (cpu_has_apic)
|
|
|
|
return;
|
|
|
|
|
|
|
|
x86_pmu.apic = 0;
|
|
|
|
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
|
|
|
|
pr_info("no hardware sampling interrupt available.\n");
|
|
|
|
}
|
|
|
|
|
2011-01-22 07:30:01 +08:00
|
|
|
static int __init init_hw_perf_events(void)
|
2009-02-27 20:39:09 +08:00
|
|
|
{
|
2010-02-01 22:36:30 +08:00
|
|
|
struct event_constraint *c;
|
2009-04-29 18:47:10 +08:00
|
|
|
int err;
|
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
pr_info("Performance Events: ");
|
2009-05-29 17:25:09 +08:00
|
|
|
|
2009-02-27 20:39:09 +08:00
|
|
|
switch (boot_cpu_data.x86_vendor) {
|
|
|
|
case X86_VENDOR_INTEL:
|
2009-04-29 18:47:10 +08:00
|
|
|
err = intel_pmu_init();
|
2009-02-27 20:39:09 +08:00
|
|
|
break;
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
case X86_VENDOR_AMD:
|
2009-04-29 18:47:10 +08:00
|
|
|
err = amd_pmu_init();
|
x86: AMD Support for perf_counter
Supported basic performance counter for AMD K7 and later:
$ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null
Performance counter stats for 'ls':
12.298610 task clock ticks (msecs)
3298477 CPU cycles (events)
1406354 instructions (events)
749035 cache references (events)
16939 cache misses (events)
100589 branches (events)
11159 branch misses (events)
7.627540 cpu clock ticks (msecs)
12.298610 task clock ticks (msecs)
500 pagefaults (events)
6 context switches (events)
3 CPU migrations (events)
Wall-clock time elapsed: 8.672290 msecs
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-02-27 22:45:14 +08:00
|
|
|
break;
|
2009-04-29 18:47:00 +08:00
|
|
|
default:
|
2010-11-26 01:38:29 +08:00
|
|
|
return 0;
|
2009-02-27 20:39:09 +08:00
|
|
|
}
|
2009-05-29 17:25:09 +08:00
|
|
|
if (err != 0) {
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
pr_cont("no PMU driver, software events only.\n");
|
2010-11-26 01:38:29 +08:00
|
|
|
return 0;
|
2009-05-29 17:25:09 +08:00
|
|
|
}
|
2009-02-27 20:39:09 +08:00
|
|
|
|
2009-12-11 00:56:34 +08:00
|
|
|
pmu_check_apic();
|
|
|
|
|
2010-11-23 05:55:23 +08:00
|
|
|
/* sanity check that the hardware exists or is emulated */
|
2010-12-08 22:56:23 +08:00
|
|
|
if (!check_hw_exists())
|
2010-11-26 01:38:29 +08:00
|
|
|
return 0;
|
2010-11-23 05:55:23 +08:00
|
|
|
|
2009-05-29 17:25:09 +08:00
|
|
|
pr_cont("%s PMU driver.\n", x86_pmu.name);
|
2009-04-29 18:47:13 +08:00
|
|
|
|
2010-03-05 04:49:01 +08:00
|
|
|
if (x86_pmu.quirks)
|
|
|
|
x86_pmu.quirks();
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
|
2010-03-30 00:36:50 +08:00
|
|
|
x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
|
|
|
|
x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
2010-03-30 00:36:50 +08:00
|
|
|
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
|
2010-03-30 00:36:50 +08:00
|
|
|
x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
|
|
|
|
x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
|
2008-12-17 17:51:15 +08:00
|
|
|
}
|
2008-12-17 20:09:20 +08:00
|
|
|
|
2010-03-17 19:49:13 +08:00
|
|
|
x86_pmu.intel_ctrl |=
|
2010-03-30 00:36:50 +08:00
|
|
|
((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
|
2008-12-03 17:39:53 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
perf_events_lapic_init();
|
|
|
|
register_die_notifier(&perf_event_nmi_notifier);
|
2009-05-29 17:25:09 +08:00
|
|
|
|
2010-01-22 23:32:17 +08:00
|
|
|
unconstrained = (struct event_constraint)
|
2010-03-30 00:36:50 +08:00
|
|
|
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
|
|
|
|
0, x86_pmu.num_counters);
|
2010-01-22 23:32:17 +08:00
|
|
|
|
2010-02-01 22:36:30 +08:00
|
|
|
if (x86_pmu.event_constraints) {
|
|
|
|
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
2010-03-30 17:28:21 +08:00
|
|
|
if (c->cmask != X86_RAW_EVENT_MASK)
|
2010-02-01 22:36:30 +08:00
|
|
|
continue;
|
|
|
|
|
2010-03-30 00:36:50 +08:00
|
|
|
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
|
|
|
|
c->weight += x86_pmu.num_counters;
|
2010-02-01 22:36:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-21 18:20:38 +08:00
|
|
|
pr_info("... version: %d\n", x86_pmu.version);
|
2010-03-30 00:36:50 +08:00
|
|
|
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
|
|
|
|
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
|
|
|
|
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
|
2009-09-21 18:20:38 +08:00
|
|
|
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
|
2010-03-30 00:36:50 +08:00
|
|
|
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
|
2010-03-17 19:49:13 +08:00
|
|
|
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
|
2010-03-05 20:01:18 +08:00
|
|
|
|
2010-11-18 06:17:36 +08:00
|
|
|
perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
|
2010-03-05 20:01:18 +08:00
|
|
|
perf_cpu_notifier(x86_pmu_notifier);
|
2010-11-26 01:38:29 +08:00
|
|
|
|
|
|
|
return 0;
|
2008-12-03 17:39:53 +08:00
|
|
|
}
|
2010-11-26 01:38:29 +08:00
|
|
|
early_initcall(init_hw_perf_events);
|
2008-12-11 19:46:46 +08:00
|
|
|
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
static inline void x86_pmu_read(struct perf_event *event)
|
2008-12-13 16:00:03 +08:00
|
|
|
{
|
2010-03-03 03:18:39 +08:00
|
|
|
x86_perf_event_update(event);
|
2008-12-13 16:00:03 +08:00
|
|
|
}
|
|
|
|
|
2010-04-23 13:56:12 +08:00
|
|
|
/*
|
|
|
|
* Start group events scheduling transaction
|
|
|
|
* Set the flag to make pmu::enable() not perform the
|
|
|
|
* schedulability test, it will be performed at commit time
|
|
|
|
*/
|
2010-06-11 19:35:57 +08:00
|
|
|
static void x86_pmu_start_txn(struct pmu *pmu)
|
2010-04-23 13:56:12 +08:00
|
|
|
{
|
2010-06-14 14:49:00 +08:00
|
|
|
perf_pmu_disable(pmu);
|
2010-12-18 23:28:55 +08:00
|
|
|
__this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
|
|
|
|
__this_cpu_write(cpu_hw_events.n_txn, 0);
|
2010-04-23 13:56:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Stop group events scheduling transaction
|
|
|
|
* Clear the flag and pmu::enable() will perform the
|
|
|
|
* schedulability test.
|
|
|
|
*/
|
2010-06-11 19:35:57 +08:00
|
|
|
static void x86_pmu_cancel_txn(struct pmu *pmu)
|
2010-04-23 13:56:12 +08:00
|
|
|
{
|
2010-12-18 23:28:55 +08:00
|
|
|
__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
|
perf_events: Fix event scheduling issues introduced by transactional API
The transactional API patch between the generic and model-specific
code introduced several important bugs with event scheduling, at
least on X86. If you had pinned events, e.g., watchdog, and were
over-committing the PMU, you would get bogus counts. The bug was
showing up on Intel CPU because events would move around more
often that on AMD. But the problem also existed on AMD, though
harder to expose.
The issues were:
- group_sched_in() was missing a cancel_txn() in the error path
- cpuc->n_added was not properly maintained, leading to missing
actions in hw_perf_enable(), i.e., n_running being 0. You cannot
update n_added until you know the transaction has succeeded. In
case of failed transaction n_added was not adjusted back.
- in case of failed transactions, event_sched_out() was called
and eventually invoked x86_disable_event() to touch the HW reg.
But with transactions, on X86, event_sched_in() does not touch
HW registers, it simply collects events into a list. Thus, you
could end up calling x86_disable_event() on a counter which
did not correspond to the current event when idx != -1.
The patch modifies the generic and X86 code to avoid all those problems.
First, we keep track of the number of events added last. In case the
transaction fails, we substract them from n_added. This approach is
necessary (as opposed to delaying updates to n_added) because not all
event updates use the transaction API, e.g., single events.
Second, we encapsulate the event_sched_in() and event_sched_out() in
group_sched_in() inside the transaction. That makes the operations
symmetrical and you can also detect that you are inside a transaction
and skip the HW reg access by checking cpuc->group_flag.
With this patch, you can now overcommit the PMU even with pinned
system-wide events present and still get valid counts.
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1274796225.5882.1389.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-05-25 22:23:10 +08:00
|
|
|
/*
|
|
|
|
* Truncate the collected events.
|
|
|
|
*/
|
2010-12-18 23:28:55 +08:00
|
|
|
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
|
|
|
|
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
|
2010-06-14 14:49:00 +08:00
|
|
|
perf_pmu_enable(pmu);
|
2010-04-23 13:56:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Commit group events scheduling transaction
|
|
|
|
* Perform the group schedulability test as a whole
|
|
|
|
* Return 0 if success
|
|
|
|
*/
|
2010-06-11 19:35:57 +08:00
|
|
|
static int x86_pmu_commit_txn(struct pmu *pmu)
|
2010-04-23 13:56:12 +08:00
|
|
|
{
|
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
int assign[X86_PMC_IDX_MAX];
|
|
|
|
int n, ret;
|
|
|
|
|
|
|
|
n = cpuc->n_events;
|
|
|
|
|
|
|
|
if (!x86_pmu_initialized())
|
|
|
|
return -EAGAIN;
|
|
|
|
|
|
|
|
ret = x86_pmu.schedule_events(cpuc, n, assign);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* copy new assignment, now we know it is possible
|
|
|
|
* will be used by hw_perf_enable()
|
|
|
|
*/
|
|
|
|
memcpy(cpuc->assign, assign, n*sizeof(int));
|
|
|
|
|
2010-05-25 23:49:05 +08:00
|
|
|
cpuc->group_flag &= ~PERF_EVENT_TXN;
|
2010-06-14 14:49:00 +08:00
|
|
|
perf_pmu_enable(pmu);
|
2010-04-23 13:56:12 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
/*
|
|
|
|
* validate that we can schedule this event
|
|
|
|
*/
|
|
|
|
static int validate_event(struct perf_event *event)
|
|
|
|
{
|
|
|
|
struct cpu_hw_events *fake_cpuc;
|
|
|
|
struct event_constraint *c;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
|
|
|
|
if (!fake_cpuc)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
c = x86_pmu.get_event_constraints(fake_cpuc, event);
|
|
|
|
|
|
|
|
if (!c || !c->weight)
|
|
|
|
ret = -ENOSPC;
|
|
|
|
|
|
|
|
if (x86_pmu.put_event_constraints)
|
|
|
|
x86_pmu.put_event_constraints(fake_cpuc, event);
|
|
|
|
|
|
|
|
kfree(fake_cpuc);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
/*
|
|
|
|
* validate a single event group
|
|
|
|
*
|
|
|
|
* validation include:
|
2010-01-27 15:39:39 +08:00
|
|
|
* - check events are compatible which each other
|
|
|
|
* - events do not compete for the same counter
|
|
|
|
* - number of events <= number of counters
|
2010-01-18 16:58:01 +08:00
|
|
|
*
|
|
|
|
* validation ensures the group can be loaded onto the
|
|
|
|
* PMU if it was the only group available.
|
|
|
|
*/
|
2009-10-08 17:56:07 +08:00
|
|
|
static int validate_group(struct perf_event *event)
|
|
|
|
{
|
2010-01-18 16:58:01 +08:00
|
|
|
struct perf_event *leader = event->group_leader;
|
2010-01-22 21:35:46 +08:00
|
|
|
struct cpu_hw_events *fake_cpuc;
|
|
|
|
int ret, n;
|
2009-10-08 17:56:07 +08:00
|
|
|
|
2010-01-22 21:35:46 +08:00
|
|
|
ret = -ENOMEM;
|
|
|
|
fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
|
|
|
|
if (!fake_cpuc)
|
|
|
|
goto out;
|
2009-10-08 17:56:07 +08:00
|
|
|
|
2010-01-18 16:58:01 +08:00
|
|
|
/*
|
|
|
|
* the event is not yet connected with its
|
|
|
|
* siblings therefore we must first collect
|
|
|
|
* existing siblings, then add the new event
|
|
|
|
* before we can simulate the scheduling
|
|
|
|
*/
|
2010-01-22 21:35:46 +08:00
|
|
|
ret = -ENOSPC;
|
|
|
|
n = collect_events(fake_cpuc, leader, true);
|
2010-01-18 16:58:01 +08:00
|
|
|
if (n < 0)
|
2010-01-22 21:35:46 +08:00
|
|
|
goto out_free;
|
2009-10-08 17:56:07 +08:00
|
|
|
|
2010-01-22 21:35:46 +08:00
|
|
|
fake_cpuc->n_events = n;
|
|
|
|
n = collect_events(fake_cpuc, event, false);
|
2010-01-18 16:58:01 +08:00
|
|
|
if (n < 0)
|
2010-01-22 21:35:46 +08:00
|
|
|
goto out_free;
|
2009-10-08 17:56:07 +08:00
|
|
|
|
2010-01-22 21:35:46 +08:00
|
|
|
fake_cpuc->n_events = n;
|
2010-01-18 16:58:01 +08:00
|
|
|
|
2010-03-12 00:54:39 +08:00
|
|
|
ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
|
2010-01-22 21:35:46 +08:00
|
|
|
|
|
|
|
out_free:
|
|
|
|
kfree(fake_cpuc);
|
|
|
|
out:
|
|
|
|
return ret;
|
2009-10-08 17:56:07 +08:00
|
|
|
}
|
|
|
|
|
2011-01-22 07:30:01 +08:00
|
|
|
static int x86_pmu_event_init(struct perf_event *event)
|
2008-12-11 19:46:46 +08:00
|
|
|
{
|
2010-06-11 19:35:57 +08:00
|
|
|
struct pmu *tmp;
|
2008-12-11 19:46:46 +08:00
|
|
|
int err;
|
|
|
|
|
2010-06-11 19:35:08 +08:00
|
|
|
switch (event->attr.type) {
|
|
|
|
case PERF_TYPE_RAW:
|
|
|
|
case PERF_TYPE_HARDWARE:
|
|
|
|
case PERF_TYPE_HW_CACHE:
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = __x86_pmu_event_init(event);
|
2009-10-08 17:56:07 +08:00
|
|
|
if (!err) {
|
2010-01-21 23:39:01 +08:00
|
|
|
/*
|
|
|
|
* we temporarily connect event to its pmu
|
|
|
|
* such that validate_group() can classify
|
|
|
|
* it as an x86 event using is_x86_event()
|
|
|
|
*/
|
|
|
|
tmp = event->pmu;
|
|
|
|
event->pmu = &pmu;
|
|
|
|
|
2009-10-08 17:56:07 +08:00
|
|
|
if (event->group_leader != event)
|
|
|
|
err = validate_group(event);
|
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-03-03 02:52:12 +08:00
|
|
|
else
|
|
|
|
err = validate_event(event);
|
2010-01-21 23:39:01 +08:00
|
|
|
|
|
|
|
event->pmu = tmp;
|
2009-10-08 17:56:07 +08:00
|
|
|
}
|
2009-09-09 16:04:47 +08:00
|
|
|
if (err) {
|
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events!
In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.
Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.
All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)
The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.
Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.
User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)
This patch has been generated via the following script:
FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')
sed -i \
-e 's/PERF_EVENT_/PERF_RECORD_/g' \
-e 's/PERF_COUNTER/PERF_EVENT/g' \
-e 's/perf_counter/perf_event/g' \
-e 's/nb_counters/nb_events/g' \
-e 's/swcounter/swevent/g' \
-e 's/tpcounter_event/tp_event/g' \
$FILES
for N in $(find . -name perf_counter.[ch]); do
M=$(echo $N | sed 's/perf_counter/perf_event/g')
mv $N $M
done
FILES=$(find . -name perf_event.*)
sed -i \
-e 's/COUNTER_MASK/REG_MASK/g' \
-e 's/COUNTER/EVENT/g' \
-e 's/\<event\>/event_id/g' \
-e 's/counter/event/g' \
-e 's/Counter/Event/g' \
$FILES
... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.
Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.
( NOTE: 'counters' are still the proper terminology when we deal
with hardware registers - and these sed scripts are a bit
over-eager in renaming them. I've undone some of that, but
in case there's something left where 'counter' would be
better than 'event' we can undo that on an individual basis
instead of touching an otherwise nicely automated patch. )
Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
|
|
|
if (event->destroy)
|
|
|
|
event->destroy(event);
|
2009-09-09 16:04:47 +08:00
|
|
|
}
|
2008-12-11 19:46:46 +08:00
|
|
|
|
2010-06-11 19:35:08 +08:00
|
|
|
return err;
|
2008-12-11 19:46:46 +08:00
|
|
|
}
|
2009-03-31 01:07:15 +08:00
|
|
|
|
2010-06-11 19:35:08 +08:00
|
|
|
static struct pmu pmu = {
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
.pmu_enable = x86_pmu_enable,
|
|
|
|
.pmu_disable = x86_pmu_disable,
|
|
|
|
|
2010-06-11 19:35:08 +08:00
|
|
|
.event_init = x86_pmu_event_init,
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
|
|
|
|
.add = x86_pmu_add,
|
|
|
|
.del = x86_pmu_del,
|
2010-06-11 19:35:08 +08:00
|
|
|
.start = x86_pmu_start,
|
|
|
|
.stop = x86_pmu_stop,
|
|
|
|
.read = x86_pmu_read,
|
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 20:37:10 +08:00
|
|
|
|
2010-06-11 19:35:08 +08:00
|
|
|
.start_txn = x86_pmu_start_txn,
|
|
|
|
.cancel_txn = x86_pmu_cancel_txn,
|
|
|
|
.commit_txn = x86_pmu_commit_txn,
|
|
|
|
};
|
|
|
|
|
2009-03-31 01:07:15 +08:00
|
|
|
/*
|
|
|
|
* callchain support
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int backtrace_stack(void *data, char *name)
|
|
|
|
{
|
2009-06-15 15:57:59 +08:00
|
|
|
return 0;
|
2009-03-31 01:07:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void backtrace_address(void *data, unsigned long addr, int reliable)
|
|
|
|
{
|
|
|
|
struct perf_callchain_entry *entry = data;
|
|
|
|
|
2010-06-30 01:34:05 +08:00
|
|
|
perf_callchain_store(entry, addr);
|
2009-03-31 01:07:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct stacktrace_ops backtrace_ops = {
|
|
|
|
.stack = backtrace_stack,
|
|
|
|
.address = backtrace_address,
|
2009-12-17 12:40:34 +08:00
|
|
|
.walk_stack = print_context_stack_bp,
|
2009-03-31 01:07:15 +08:00
|
|
|
};
|
|
|
|
|
2010-07-01 05:03:51 +08:00
|
|
|
void
|
|
|
|
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
2009-03-31 01:07:15 +08:00
|
|
|
{
|
2010-07-01 22:20:36 +08:00
|
|
|
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
|
|
|
/* TODO: We don't support guest os callchain now */
|
2010-08-20 20:30:41 +08:00
|
|
|
return;
|
2010-07-01 22:20:36 +08:00
|
|
|
}
|
|
|
|
|
2010-06-30 01:34:05 +08:00
|
|
|
perf_callchain_store(entry, regs->ip);
|
2009-03-31 01:07:15 +08:00
|
|
|
|
2011-03-18 10:40:06 +08:00
|
|
|
dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
|
2009-03-31 01:07:15 +08:00
|
|
|
}
|
|
|
|
|
2010-03-17 18:07:16 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
static inline int
|
|
|
|
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
2009-06-15 19:07:24 +08:00
|
|
|
{
|
2010-03-17 18:07:16 +08:00
|
|
|
/* 32-bit process in 64-bit kernel. */
|
|
|
|
struct stack_frame_ia32 frame;
|
|
|
|
const void __user *fp;
|
2009-06-15 19:07:24 +08:00
|
|
|
|
2010-03-17 18:07:16 +08:00
|
|
|
if (!test_thread_flag(TIF_IA32))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fp = compat_ptr(regs->bp);
|
|
|
|
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
|
|
|
unsigned long bytes;
|
|
|
|
frame.next_frame = 0;
|
|
|
|
frame.return_address = 0;
|
|
|
|
|
|
|
|
bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
|
|
|
|
if (bytes != sizeof(frame))
|
|
|
|
break;
|
2009-06-15 19:07:24 +08:00
|
|
|
|
2010-03-17 18:07:16 +08:00
|
|
|
if (fp < compat_ptr(regs->sp))
|
|
|
|
break;
|
2009-06-15 19:07:24 +08:00
|
|
|
|
2010-06-30 01:34:05 +08:00
|
|
|
perf_callchain_store(entry, frame.return_address);
|
2010-03-17 18:07:16 +08:00
|
|
|
fp = compat_ptr(frame.next_frame);
|
|
|
|
}
|
|
|
|
return 1;
|
2009-03-31 01:07:15 +08:00
|
|
|
}
|
2010-03-17 18:07:16 +08:00
|
|
|
#else
|
|
|
|
static inline int
|
|
|
|
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
2009-03-31 01:07:15 +08:00
|
|
|
|
2010-07-01 05:03:51 +08:00
|
|
|
void
|
|
|
|
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
2009-03-31 01:07:15 +08:00
|
|
|
{
|
|
|
|
struct stack_frame frame;
|
|
|
|
const void __user *fp;
|
|
|
|
|
2010-07-01 22:20:36 +08:00
|
|
|
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
|
|
|
/* TODO: We don't support guest os callchain now */
|
2010-08-20 20:30:41 +08:00
|
|
|
return;
|
2010-07-01 22:20:36 +08:00
|
|
|
}
|
2009-05-29 17:25:09 +08:00
|
|
|
|
2009-06-15 19:07:24 +08:00
|
|
|
fp = (void __user *)regs->bp;
|
2009-03-31 01:07:15 +08:00
|
|
|
|
2010-06-30 01:34:05 +08:00
|
|
|
perf_callchain_store(entry, regs->ip);
|
2009-03-31 01:07:15 +08:00
|
|
|
|
2010-03-17 18:07:16 +08:00
|
|
|
if (perf_callchain_user32(regs, entry))
|
|
|
|
return;
|
|
|
|
|
2009-06-19 04:20:52 +08:00
|
|
|
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
2010-03-17 18:07:16 +08:00
|
|
|
unsigned long bytes;
|
2009-06-15 15:57:59 +08:00
|
|
|
frame.next_frame = NULL;
|
2009-03-31 01:07:15 +08:00
|
|
|
frame.return_address = 0;
|
|
|
|
|
2010-03-17 18:07:16 +08:00
|
|
|
bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
|
|
|
|
if (bytes != sizeof(frame))
|
2009-03-31 01:07:15 +08:00
|
|
|
break;
|
|
|
|
|
2009-05-29 17:25:09 +08:00
|
|
|
if ((unsigned long)fp < regs->sp)
|
2009-03-31 01:07:15 +08:00
|
|
|
break;
|
|
|
|
|
2010-06-30 01:34:05 +08:00
|
|
|
perf_callchain_store(entry, frame.return_address);
|
2009-06-15 15:57:59 +08:00
|
|
|
fp = frame.next_frame;
|
2009-03-31 01:07:15 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-04-19 13:32:41 +08:00
|
|
|
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
unsigned long ip;
|
2010-04-20 10:13:58 +08:00
|
|
|
|
2010-04-19 13:32:41 +08:00
|
|
|
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
|
|
|
|
ip = perf_guest_cbs->get_guest_ip();
|
|
|
|
else
|
|
|
|
ip = instruction_pointer(regs);
|
2010-04-20 10:13:58 +08:00
|
|
|
|
2010-04-19 13:32:41 +08:00
|
|
|
return ip;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long perf_misc_flags(struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
int misc = 0;
|
2010-04-20 10:13:58 +08:00
|
|
|
|
2010-04-19 13:32:41 +08:00
|
|
|
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
2010-04-20 10:13:58 +08:00
|
|
|
if (perf_guest_cbs->is_user_mode())
|
|
|
|
misc |= PERF_RECORD_MISC_GUEST_USER;
|
|
|
|
else
|
|
|
|
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
|
|
|
|
} else {
|
|
|
|
if (user_mode(regs))
|
|
|
|
misc |= PERF_RECORD_MISC_USER;
|
|
|
|
else
|
|
|
|
misc |= PERF_RECORD_MISC_KERNEL;
|
|
|
|
}
|
|
|
|
|
2010-04-19 13:32:41 +08:00
|
|
|
if (regs->flags & PERF_EFLAGS_EXACT)
|
2010-04-09 05:03:20 +08:00
|
|
|
misc |= PERF_RECORD_MISC_EXACT_IP;
|
2010-04-19 13:32:41 +08:00
|
|
|
|
|
|
|
return misc;
|
|
|
|
}
|