mirror of https://gitee.com/openkylin/linux.git
libperf: Add support for user space counter access
x86 and arm64 can both support direct access of event counters in userspace. The access sequence is less than trivial and currently exists in perf test code (tools/perf/arch/x86/tests/rdpmc.c) with copies in projects such as PAPI and libpfm4. In order to support userspace access, an event must be mmapped first with perf_evsel__mmap(). Then subsequent calls to perf_evsel__read() will use the fast path (assuming the arch supports it). Committer notes: Added a '__maybe_unused' attribute to the read_perf_counter() argument to fix the build on arches other than x86_64 and arm. Committer testing: Building and running the libperf tests in verbose mode (V=1) now shows those "loop = N, count = N" extra lines, testing user space counter access. # make V=1 -C tools/lib/perf tests make: Entering directory '/home/acme/git/perf/tools/lib/perf' make -f /home/acme/git/perf/tools/build/Makefile.build dir=. obj=libperf make -C /home/acme/git/perf/tools/lib/api/ O= libapi.a make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fd obj=libapi make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fs obj=libapi make -C tests gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-cpumap-a test-cpumap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-threadmap-a test-threadmap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evlist-a test-evlist.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evsel-a test-evsel.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-cpumap-so test-cpumap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-threadmap-so test-threadmap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evlist-so test-evlist.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evsel-so test-evsel.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf make -C tests run running static: - running test-cpumap.c...OK - running test-threadmap.c...OK - running test-evlist.c...OK - running test-evsel.c... loop = 65536, count = 333926 loop = 131072, count = 655781 loop = 262144, count = 1311141 loop = 524288, count = 2630126 loop = 1048576, count = 5256955 loop = 65536, count = 524594 loop = 131072, count = 1058916 loop = 262144, count = 2097458 loop = 524288, count = 4205429 loop = 1048576, count = 8406606 OK running dynamic: - running test-cpumap.c...OK - running test-threadmap.c...OK - running test-evlist.c...OK - running test-evsel.c... loop = 65536, count = 328102 loop = 131072, count = 655782 loop = 262144, count = 1317494 loop = 524288, count = 2627851 loop = 1048576, count = 5255187 loop = 65536, count = 524601 loop = 131072, count = 1048923 loop = 262144, count = 2107917 loop = 524288, count = 4194606 loop = 1048576, count = 8409322 OK make: Leaving directory '/home/acme/git/perf/tools/lib/perf' # Signed-off-by: Rob Herring <robh@kernel.org> Acked-by: Jiri Olsa <jolsa@redhat.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Itaru Kitayama <itaru.kitayama@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Link: http://lore.kernel.org/lkml/20210414155412.3697605-4-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
d3003d9e68
commit
47d01e7b99
|
@ -267,6 +267,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
|
|||
if (FD(evsel, cpu, thread) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (MMAP(evsel, cpu, thread) &&
|
||||
!perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
|
||||
return 0;
|
||||
|
||||
if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
|
||||
return -errno;
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
|
||||
|
||||
struct perf_mmap;
|
||||
struct perf_counts_values;
|
||||
|
||||
typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
|
||||
|
||||
|
@ -52,4 +53,6 @@ void perf_mmap__put(struct perf_mmap *map);
|
|||
|
||||
u64 perf_mmap__read_head(struct perf_mmap *map);
|
||||
|
||||
int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count);
|
||||
|
||||
#endif /* __LIBPERF_INTERNAL_MMAP_H */
|
||||
|
|
|
@ -8,9 +8,11 @@
|
|||
#include <linux/perf_event.h>
|
||||
#include <perf/mmap.h>
|
||||
#include <perf/event.h>
|
||||
#include <perf/evsel.h>
|
||||
#include <internal/mmap.h>
|
||||
#include <internal/lib.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/math64.h>
|
||||
#include "internal.h"
|
||||
|
||||
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
|
||||
|
@ -273,3 +275,89 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map)
|
|||
|
||||
return event;
|
||||
}
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
static u64 read_perf_counter(unsigned int counter)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
|
||||
|
||||
return low | ((u64)high) << 32;
|
||||
}
|
||||
|
||||
static u64 read_timestamp(void)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("rdtsc" : "=a" (low), "=d" (high));
|
||||
|
||||
return low | ((u64)high) << 32;
|
||||
}
|
||||
#else
|
||||
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
|
||||
static u64 read_timestamp(void) { return 0; }
|
||||
#endif
|
||||
|
||||
int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
|
||||
{
|
||||
struct perf_event_mmap_page *pc = map->base;
|
||||
u32 seq, idx, time_mult = 0, time_shift = 0;
|
||||
u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
|
||||
|
||||
if (!pc || !pc->cap_user_rdpmc)
|
||||
return -1;
|
||||
|
||||
do {
|
||||
seq = READ_ONCE(pc->lock);
|
||||
barrier();
|
||||
|
||||
count->ena = READ_ONCE(pc->time_enabled);
|
||||
count->run = READ_ONCE(pc->time_running);
|
||||
|
||||
if (pc->cap_user_time && count->ena != count->run) {
|
||||
cyc = read_timestamp();
|
||||
time_mult = READ_ONCE(pc->time_mult);
|
||||
time_shift = READ_ONCE(pc->time_shift);
|
||||
time_offset = READ_ONCE(pc->time_offset);
|
||||
|
||||
if (pc->cap_user_time_short) {
|
||||
time_cycles = READ_ONCE(pc->time_cycles);
|
||||
time_mask = READ_ONCE(pc->time_mask);
|
||||
}
|
||||
}
|
||||
|
||||
idx = READ_ONCE(pc->index);
|
||||
cnt = READ_ONCE(pc->offset);
|
||||
if (pc->cap_user_rdpmc && idx) {
|
||||
s64 evcnt = read_perf_counter(idx - 1);
|
||||
u16 width = READ_ONCE(pc->pmc_width);
|
||||
|
||||
evcnt <<= 64 - width;
|
||||
evcnt >>= 64 - width;
|
||||
cnt += evcnt;
|
||||
} else
|
||||
return -1;
|
||||
|
||||
barrier();
|
||||
} while (READ_ONCE(pc->lock) != seq);
|
||||
|
||||
if (count->ena != count->run) {
|
||||
u64 delta;
|
||||
|
||||
/* Adjust for cap_usr_time_short, a nop if not */
|
||||
cyc = time_cycles + ((cyc - time_cycles) & time_mask);
|
||||
|
||||
delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
|
||||
|
||||
count->ena += delta;
|
||||
if (idx)
|
||||
count->run += delta;
|
||||
|
||||
cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
|
||||
}
|
||||
|
||||
count->val = cnt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -120,6 +120,70 @@ static int test_stat_thread_enable(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int test_stat_user_read(int event)
|
||||
{
|
||||
struct perf_counts_values counts = { .val = 0 };
|
||||
struct perf_thread_map *threads;
|
||||
struct perf_evsel *evsel;
|
||||
struct perf_event_mmap_page *pc;
|
||||
struct perf_event_attr attr = {
|
||||
.type = PERF_TYPE_HARDWARE,
|
||||
.config = event,
|
||||
};
|
||||
int err, i;
|
||||
|
||||
threads = perf_thread_map__new_dummy();
|
||||
__T("failed to create threads", threads);
|
||||
|
||||
perf_thread_map__set_pid(threads, 0, 0);
|
||||
|
||||
evsel = perf_evsel__new(&attr);
|
||||
__T("failed to create evsel", evsel);
|
||||
|
||||
err = perf_evsel__open(evsel, NULL, threads);
|
||||
__T("failed to open evsel", err == 0);
|
||||
|
||||
err = perf_evsel__mmap(evsel, 0);
|
||||
__T("failed to mmap evsel", err == 0);
|
||||
|
||||
pc = perf_evsel__mmap_base(evsel, 0, 0);
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
__T("userspace counter access not supported", pc->cap_user_rdpmc);
|
||||
__T("userspace counter access not enabled", pc->index);
|
||||
__T("userspace counter width not set", pc->pmc_width >= 32);
|
||||
#endif
|
||||
|
||||
perf_evsel__read(evsel, 0, 0, &counts);
|
||||
__T("failed to read value for evsel", counts.val != 0);
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
volatile int count = 0x10000 << i;
|
||||
__u64 start, end, last = 0;
|
||||
|
||||
__T_VERBOSE("\tloop = %u, ", count);
|
||||
|
||||
perf_evsel__read(evsel, 0, 0, &counts);
|
||||
start = counts.val;
|
||||
|
||||
while (count--) ;
|
||||
|
||||
perf_evsel__read(evsel, 0, 0, &counts);
|
||||
end = counts.val;
|
||||
|
||||
__T("invalid counter data", (end - start) > last);
|
||||
last = end - start;
|
||||
__T_VERBOSE("count = %llu\n", end - start);
|
||||
}
|
||||
|
||||
perf_evsel__munmap(evsel);
|
||||
perf_evsel__close(evsel);
|
||||
perf_evsel__delete(evsel);
|
||||
|
||||
perf_thread_map__put(threads);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
__T_START;
|
||||
|
@ -129,6 +193,8 @@ int main(int argc, char **argv)
|
|||
test_stat_cpu();
|
||||
test_stat_thread();
|
||||
test_stat_thread_enable();
|
||||
test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS);
|
||||
test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES);
|
||||
|
||||
__T_END;
|
||||
return tests_failed == 0 ? 0 : -1;
|
||||
|
|
Loading…
Reference in New Issue