2016-09-22 23:36:38 +08:00
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include "util.h"
|
|
|
|
#include "debug.h"
|
|
|
|
#include "builtin.h"
|
|
|
|
#include <subcmd/parse-options.h>
|
perf c2c: Add record subcommand
Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.
$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...
It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.
Details are described in the man page, which is added in one of the
following patches.
Committer notes:
Testing it:
# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-22 23:36:39 +08:00
|
|
|
#include "mem-events.h"
|
2016-09-22 23:36:38 +08:00
|
|
|
|
|
|
|
static const char * const c2c_usage[] = {
|
|
|
|
"perf c2c",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
perf c2c: Add record subcommand
Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.
$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...
It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.
Details are described in the man page, which is added in one of the
following patches.
Committer notes:
Testing it:
# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-22 23:36:39 +08:00
|
|
|
static int parse_record_events(const struct option *opt __maybe_unused,
|
|
|
|
const char *str, int unset __maybe_unused)
|
|
|
|
{
|
|
|
|
bool *event_set = (bool *) opt->value;
|
|
|
|
|
|
|
|
*event_set = true;
|
|
|
|
return perf_mem_events__parse(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const char * const __usage_record[] = {
|
|
|
|
"perf c2c record [<options>] [<command>]",
|
|
|
|
"perf c2c record [<options>] -- <command> [<options>]",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char * const *record_mem_usage = __usage_record;
|
|
|
|
|
|
|
|
static int perf_c2c__record(int argc, const char **argv)
|
|
|
|
{
|
|
|
|
int rec_argc, i = 0, j;
|
|
|
|
const char **rec_argv;
|
|
|
|
int ret;
|
|
|
|
bool all_user = false, all_kernel = false;
|
|
|
|
bool event_set = false;
|
|
|
|
struct option options[] = {
|
|
|
|
OPT_CALLBACK('e', "event", &event_set, "event",
|
|
|
|
"event selector. Use 'perf mem record -e list' to list available events",
|
|
|
|
parse_record_events),
|
|
|
|
OPT_INCR('v', "verbose", &verbose,
|
|
|
|
"be more verbose (show counter open errors, etc)"),
|
|
|
|
OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
|
|
|
|
OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
|
|
|
|
OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
if (perf_mem_events__init()) {
|
|
|
|
pr_err("failed: memory events not supported\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, options, record_mem_usage,
|
|
|
|
PARSE_OPT_KEEP_UNKNOWN);
|
|
|
|
|
|
|
|
rec_argc = argc + 10; /* max number of arguments */
|
|
|
|
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
|
|
|
if (!rec_argv)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
rec_argv[i++] = "record";
|
|
|
|
|
|
|
|
if (!event_set) {
|
|
|
|
perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
|
|
|
|
perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
|
|
|
|
rec_argv[i++] = "-W";
|
|
|
|
|
|
|
|
rec_argv[i++] = "-d";
|
|
|
|
rec_argv[i++] = "--sample-cpu";
|
|
|
|
|
|
|
|
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
|
|
|
|
if (!perf_mem_events[j].record)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!perf_mem_events[j].supported) {
|
|
|
|
pr_err("failed: event '%s' not supported\n",
|
|
|
|
perf_mem_events[j].name);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
rec_argv[i++] = "-e";
|
|
|
|
rec_argv[i++] = perf_mem_events__name(j);
|
|
|
|
};
|
|
|
|
|
|
|
|
if (all_user)
|
|
|
|
rec_argv[i++] = "--all-user";
|
|
|
|
|
|
|
|
if (all_kernel)
|
|
|
|
rec_argv[i++] = "--all-kernel";
|
|
|
|
|
|
|
|
for (j = 0; j < argc; j++, i++)
|
|
|
|
rec_argv[i] = argv[j];
|
|
|
|
|
|
|
|
if (verbose > 0) {
|
|
|
|
pr_debug("calling: ");
|
|
|
|
|
|
|
|
j = 0;
|
|
|
|
|
|
|
|
while (rec_argv[j]) {
|
|
|
|
pr_debug("%s ", rec_argv[j]);
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
pr_debug("\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = cmd_record(i, rec_argv, NULL);
|
|
|
|
free(rec_argv);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-09-22 23:36:38 +08:00
|
|
|
int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused)
|
|
|
|
{
|
|
|
|
const struct option c2c_options[] = {
|
|
|
|
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
|
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
|
|
|
argc = parse_options(argc, argv, c2c_options, c2c_usage,
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
perf c2c: Add record subcommand
Adding c2c record subcommand. It setups options related to HITM
cacheline analysis and calls standard perf record command.
$ sudo perf c2c record -v -- -a
calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a
...
It produces perf.data, which is to be reported by perf c2c report, that
comes in following patches.
Details are described in the man page, which is added in one of the
following patches.
Committer notes:
Testing it:
# perf c2c record -a sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ]
# ls -la perf.data
-rw-------. 1 root root 5301752 Oct 4 13:32 perf.data
# perf evlist
cpu/mem-loads,ldlat=30/P
cpu/mem-stores/P
# perf evlist -v
cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f
cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1
#
# perf report --stdio
<SNIP>
# Total Lost Samples: 14
# Samples: 216 of event 'cpu/mem-loads,ldlat=30/P'
# Event count (approx.): 15207
# Overhead Symbol Shared Object
# ........ ..................................... ............................
10.32% [k] update_blocked_averages [kernel.vmlinux]
3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted)
2.52% [k] enqueue_entity [kernel.vmlinux]
1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2
1.86% [k] __schedule [kernel.vmlinux]
<SNIP>
# Samples: 196 of event 'cpu/mem-stores/P'
# Event count (approx.): 14771346
# Overhead Symbol Shared Object
# ........ ................................... ............................
13.91% [k] intel_idle [kernel.vmlinux]
3.02% [.] 0x00000000022f06ea chrome
2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted)
2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted)
2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted)
<SNIP>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-09-22 23:36:39 +08:00
|
|
|
|
|
|
|
if (!argc)
|
|
|
|
usage_with_options(c2c_usage, c2c_options);
|
|
|
|
|
|
|
|
if (!strncmp(argv[0], "rec", 3)) {
|
|
|
|
return perf_c2c__record(argc, argv);
|
|
|
|
} else {
|
|
|
|
usage_with_options(c2c_usage, c2c_options);
|
|
|
|
}
|
|
|
|
|
2016-09-22 23:36:38 +08:00
|
|
|
return 0;
|
|
|
|
}
|