perf/urgent fixes:

perf.data:
 
   Alexey Budankov:
 
   - Fix loading of compressed data split across adjacent records
 
   Jiri Olsa:
 
   - Fix buffer size setting for processing CPU topology perf.data header.
 
 perf stat:
 
   Jiri Olsa:
 
   - Fix segfault for event group in repeat mode
 
   Cong Wang:
 
   - Always separate "stalled cycles per insn" line, it was being appended to
     the "instructions" line.
 
 perf script:
 
   Andi Kleen:
 
   - Fix --max-blocks man page description.
 
   - Improve man page description of metrics.
 
   - Fix off by one in brstackinsn IPC computation.
 
 perf probe:
 
   Arnaldo Carvalho de Melo:
 
   - Avoid calling freeing routine multiple times for same pointer.
 
 perf build:
 
   - Do not use -Wshadow on gcc < 4.8, avoiding too strict warnings
     treated as errors, breaking the build.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCXTcpSQAKCRCyPKLppCJ+
 J0s1AQCY4uEiw7ZDUMPkztqG/9nder8M4ncd2FYwsQObmjxhBQEA+u/jvJ9UcUKk
 X9BpjDE+1Pi3LrMaFjDQMKgpSutzXgg=
 =rAom
 -----END PGP SIGNATURE-----

Merge tag 'perf-urgent-for-mingo-5.3-20190723' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/urgent fixes from Arnaldo Carvalho de Melo:

perf.data:

  Alexey Budankov:

  - Fix loading of compressed data split across adjacent records

  Jiri Olsa:

  - Fix buffer size setting for processing CPU topology perf.data header.

perf stat:

  Jiri Olsa:

  - Fix segfault for event group in repeat mode

  Cong Wang:

  - Always separate "stalled cycles per insn" line, it was being appended to
    the "instructions" line.

perf script:

  Andi Kleen:

  - Fix --max-blocks man page description.

  - Improve man page description of metrics.

  - Fix off by one in brstackinsn IPC computation.

perf probe:

  Arnaldo Carvalho de Melo:

  - Avoid calling freeing routine multiple times for same pointer.

perf build:

  - Do not use -Wshadow on gcc < 4.8, avoiding too strict warnings
    treated as errors, breaking the build.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2019-07-23 23:41:33 +02:00
commit 49902052fc
12 changed files with 54 additions and 19 deletions

View File

@ -228,11 +228,11 @@ OPTIONS
With the metric option perf script can compute metrics for
sampling periods, similar to perf stat. This requires
specifying a group with multiple metrics with the :S option
specifying a group with multiple events defining metrics with the :S option
for perf record. perf will sample on the first event, and
compute metrics for all the events in the group. Please note
print computed metrics for all the events in the group. Please note
that the metric computed is averaged over the whole sampling
period, not just for the sample point.
period (since the last sample), not just for the sample point.
For sample events it's possible to display misc field with -F +misc option,
following letters are displayed for each bit:
@ -384,7 +384,7 @@ include::itrace.txt[]
perf script --time 0%-10%,30%-40%
--max-blocks::
Set the maximum number of program blocks to print with brstackasm for
Set the maximum number of program blocks to print with brstackinsn for
each sample.
--reltime::

View File

@ -698,6 +698,16 @@ __cmd_probe(int argc, const char **argv)
ret = perf_add_probe_events(params.events, params.nevents);
if (ret < 0) {
/*
* When perf_add_probe_events() fails it calls
* cleanup_perf_probe_events(pevs, npevs), i.e.
* cleanup_perf_probe_events(params.events, params.nevents), which
* will call clear_perf_probe_event(), so set nevents to zero
* to avoid cleanup_params() to call clear_perf_probe_event() again
* on the same pevs.
*/
params.nevents = 0;
pr_err_with_code(" Error: Failed to add events.", ret);
return ret;
}

View File

@ -1059,7 +1059,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);

View File

@ -607,7 +607,13 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
* group leaders.
*/
read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
perf_evlist__close(evsel_list);
/*
* We need to keep evsel_list alive, because it's processed
* later the evsel_list will be closed after.
*/
if (!STAT_RECORD)
perf_evlist__close(evsel_list);
return WEXITSTATUS(status);
}
@ -1997,6 +2003,7 @@ int cmd_stat(int argc, const char **argv)
perf_session__write_header(perf_stat.session, evsel_list, fd, true);
}
perf_evlist__close(evsel_list);
perf_session__delete(perf_stat.session);
}

View File

@ -1291,6 +1291,7 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
zfree(&evsel->id);
evsel->ids = 0;
}
static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
@ -2077,6 +2078,7 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__close_fd(evsel);
perf_evsel__free_fd(evsel);
perf_evsel__free_id(evsel);
}
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,

View File

@ -3747,7 +3747,7 @@ int perf_event__process_feature(struct perf_session *session,
return 0;
ff.buf = (void *)fe->data;
ff.size = event->header.size - sizeof(event->header);
ff.size = event->header.size - sizeof(*fe);
ff.ph = &session->header;
if (feat_ops[feat].process(&ff, NULL))

View File

@ -2230,6 +2230,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
field = next;
}
}
pev->nargs = 0;
zfree(&pev->args);
}

View File

@ -36,10 +36,16 @@ static int perf_session__process_compressed_event(struct perf_session *session,
void *src;
size_t decomp_size, src_size;
u64 decomp_last_rem = 0;
size_t decomp_len = session->header.env.comp_mmap_len;
size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
struct decomp *decomp, *decomp_last = session->decomp_last;
decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
if (decomp_last) {
decomp_last_rem = decomp_last->size - decomp_last->head;
decomp_len += decomp_last_rem;
}
mmap_len = sizeof(struct decomp) + decomp_len;
decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (decomp == MAP_FAILED) {
pr_err("Couldn't allocate memory for decompression\n");
@ -47,10 +53,10 @@ static int perf_session__process_compressed_event(struct perf_session *session,
}
decomp->file_pos = file_offset;
decomp->mmap_len = mmap_len;
decomp->head = 0;
if (decomp_last) {
decomp_last_rem = decomp_last->size - decomp_last->head;
if (decomp_last_rem) {
memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
decomp->size = decomp_last_rem;
}
@ -61,7 +67,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
if (!decomp_size) {
munmap(decomp, sizeof(struct decomp) + decomp_len);
munmap(decomp, mmap_len);
pr_err("Couldn't decompress data\n");
return -1;
}
@ -255,15 +261,15 @@ static void perf_session__delete_threads(struct perf_session *session)
static void perf_session__release_decomp_events(struct perf_session *session)
{
struct decomp *next, *decomp;
size_t decomp_len;
size_t mmap_len;
next = session->decomp;
decomp_len = session->header.env.comp_mmap_len;
do {
decomp = next;
if (decomp == NULL)
break;
next = decomp->next;
munmap(decomp, decomp_len + sizeof(struct decomp));
mmap_len = decomp->mmap_len;
munmap(decomp, mmap_len);
} while (1);
}

View File

@ -46,6 +46,7 @@ struct perf_session {
struct decomp {
struct decomp *next;
u64 file_pos;
size_t mmap_len;
u64 head;
size_t size;
char data[];

View File

@ -819,7 +819,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
"stalled cycles per insn",
ratio);
} else if (have_frontend_stalled) {
print_metric(config, ctxp, NULL, NULL,
out->new_line(config, ctxp);
print_metric(config, ctxp, NULL, "%7.2f ",
"stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {

View File

@ -99,8 +99,8 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
pr_err("failed to decompress (B): %ld -> %ld : %s\n",
src_size, output.size, ZSTD_getErrorName(ret));
pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
src_size, output.size, dst_size, ZSTD_getErrorName(ret));
break;
}
output.dst = dst + output.pos;

View File

@ -32,7 +32,6 @@ EXTRA_WARNINGS += -Wno-system-headers
EXTRA_WARNINGS += -Wold-style-definition
EXTRA_WARNINGS += -Wpacked
EXTRA_WARNINGS += -Wredundant-decls
EXTRA_WARNINGS += -Wshadow
EXTRA_WARNINGS += -Wstrict-prototypes
EXTRA_WARNINGS += -Wswitch-default
EXTRA_WARNINGS += -Wswitch-enum
@ -69,8 +68,16 @@ endif
# will do for now and keep the above -Wstrict-aliasing=3 in place
# in newer systems.
# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
#
# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
# that takes into account Linus's comments (search for Wshadow) for the reasoning about
# -Wshadow not being interesting before gcc 4.8.
ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3
EXTRA_WARNINGS += -fno-strict-aliasing
EXTRA_WARNINGS += -Wno-shadow
else
EXTRA_WARNINGS += -Wshadow
endif
ifneq ($(findstring $(MAKEFLAGS), w),w)