mirror of https://gitee.com/openkylin/linux.git
perf stat: Use affinity for reading
Restructure event reading to use affinity to minimize the number of IPIs needed. Before on a large test case with 94 CPUs: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 3.16 0.106079 4 22082 read After: 3.43 0.081295 3 22082 read Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
4804e01116
commit
4b49ab708d
|
@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
|
|||
* Read out the results of a single counter:
|
||||
* do not aggregate counts across CPUs in system-wide mode
|
||||
*/
|
||||
static int read_counter(struct evsel *counter, struct timespec *rs)
|
||||
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
|
||||
{
|
||||
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
|
||||
int ncpus, cpu, thread;
|
||||
|
||||
if (target__has_cpu(&target) && !target__has_per_thread(&target))
|
||||
ncpus = perf_evsel__nr_cpus(counter);
|
||||
else
|
||||
ncpus = 1;
|
||||
int thread;
|
||||
|
||||
if (!counter->supported)
|
||||
return -ENOENT;
|
||||
|
@ -283,39 +278,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
|
|||
nthreads = 1;
|
||||
|
||||
for (thread = 0; thread < nthreads; thread++) {
|
||||
for (cpu = 0; cpu < ncpus; cpu++) {
|
||||
struct perf_counts_values *count;
|
||||
struct perf_counts_values *count;
|
||||
|
||||
count = perf_counts(counter->counts, cpu, thread);
|
||||
count = perf_counts(counter->counts, cpu, thread);
|
||||
|
||||
/*
|
||||
* The leader's group read loads data into its group members
|
||||
* (via perf_evsel__read_counter) and sets threir count->loaded.
|
||||
*/
|
||||
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
|
||||
read_single_counter(counter, cpu, thread, rs)) {
|
||||
counter->counts->scaled = -1;
|
||||
perf_counts(counter->counts, cpu, thread)->ena = 0;
|
||||
perf_counts(counter->counts, cpu, thread)->run = 0;
|
||||
/*
|
||||
* The leader's group read loads data into its group members
|
||||
* (via perf_evsel__read_counter()) and sets their count->loaded.
|
||||
*/
|
||||
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
|
||||
read_single_counter(counter, cpu, thread, rs)) {
|
||||
counter->counts->scaled = -1;
|
||||
perf_counts(counter->counts, cpu, thread)->ena = 0;
|
||||
perf_counts(counter->counts, cpu, thread)->run = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
perf_counts__set_loaded(counter->counts, cpu, thread, false);
|
||||
|
||||
if (STAT_RECORD) {
|
||||
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
|
||||
pr_err("failed to write stat event\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
perf_counts__set_loaded(counter->counts, cpu, thread, false);
|
||||
|
||||
if (STAT_RECORD) {
|
||||
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
|
||||
pr_err("failed to write stat event\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose > 1) {
|
||||
fprintf(stat_config.output,
|
||||
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
||||
perf_evsel__name(counter),
|
||||
cpu,
|
||||
count->val, count->ena, count->run);
|
||||
}
|
||||
if (verbose > 1) {
|
||||
fprintf(stat_config.output,
|
||||
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
||||
perf_evsel__name(counter),
|
||||
cpu,
|
||||
count->val, count->ena, count->run);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
|
|||
static void read_counters(struct timespec *rs)
|
||||
{
|
||||
struct evsel *counter;
|
||||
int ret;
|
||||
struct affinity affinity;
|
||||
int i, ncpus, cpu;
|
||||
|
||||
if (affinity__setup(&affinity) < 0)
|
||||
return;
|
||||
|
||||
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
|
||||
if (!target__has_cpu(&target) || target__has_per_thread(&target))
|
||||
ncpus = 1;
|
||||
evlist__for_each_cpu(evsel_list, i, cpu) {
|
||||
if (i >= ncpus)
|
||||
break;
|
||||
affinity__set(&affinity, cpu);
|
||||
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
if (evsel__cpu_iter_skip(counter, cpu))
|
||||
continue;
|
||||
if (!counter->err) {
|
||||
counter->err = read_counter_cpu(counter, rs,
|
||||
counter->cpu_iter - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
affinity__cleanup(&affinity);
|
||||
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
ret = read_counter(counter, rs);
|
||||
if (ret)
|
||||
if (counter->err)
|
||||
pr_debug("failed to read counter %s\n", counter->name);
|
||||
|
||||
if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
|
||||
if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
|
||||
pr_warning("failed to process counter %s\n", counter->name);
|
||||
counter->err = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -86,6 +86,7 @@ struct evsel {
|
|||
struct list_head config_terms;
|
||||
struct bpf_object *bpf_obj;
|
||||
int bpf_fd;
|
||||
int err;
|
||||
bool auto_merge_stats;
|
||||
bool merged_stat;
|
||||
const char * metric_expr;
|
||||
|
|
Loading…
Reference in New Issue