perf data: Add JSON export

This adds a feature to export perf data to JSON.

The resolved symbols are exported into the JSON so that external tools
don't need to load the dsos themselves (or even have access to them at
all.) This makes it easy to load and analyze perf data with standalone
tools where direct perf or libbabeltrace integration is impractical.

The exporter uses a minimal inline JSON encoding without any external
dependencies. Currently it only outputs some headers and sample metadata
but it's easily extensible.

Use it like this:

  $ perf data convert --to-json out.json

Committer notes:

Fixup a __printf() bug that broke the build:

  util/data-convert-json.c:103:11: error: expected ‘)’ before numeric constant
    103 | __(printf, 5, 6)
        |           ^~
        |           )
  util/data-convert-json.c: In function ‘output_sample_callchain_entry’:
  util/data-convert-json.c:124:2: error: implicit declaration of function ‘output_json_key_format’; did you mean ‘output_json_format’? [-Werror=implicit-function-declaration]
    124 |  output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
        |  ^~~~~~~~~~~~~~~~~~~~~~
        |  output_json_format

Also had to add this patch to fix errors reported by various versions of
clang:

  -       if (al && al->sym && al->sym->name && strlen(al->sym->name) > 0) {
  +       if (al && al->sym && al->sym->namelen) {

al->sym->name is a zero sized array, to avoid one extra alloc in the
symbol__new() constructor, sym->namelen carries its strlen.

Committer testing:

  $ ls -la out.json
  ls: cannot access 'out.json': No such file or directory
  $ perf record sleep 0.1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ]
  $ perf report --stats | grep -w SAMPLE
            SAMPLE events:          8
  $ perf data convert --to-json out.json
  [ perf data convert: Converted 'perf.data' into JSON data 'out.json' ]
  [ perf data convert: Converted and wrote 0.002 MB (8 samples) ]
  $ ls -la out.json
  -rw-rw-r--. 1 acme acme 2017 Apr 26 17:29 out.json
  $ cat out.json
  {
  	"linux-perf-json-version": 1,
  	"headers": {
  		"header-version": 1,
  		"captured-on": "2021-04-26T20:28:57Z",
  		"data-offset": 432,
  		"data-size": 1016,
  		"feat-offset": 1448,
  		"hostname": "five",
  		"os-release": "5.11.14-200.fc33.x86_64",
  		"arch": "x86_64",
  		"cpu-desc": "AMD Ryzen 9 3900X 12-Core Processor",
  		"cpuid": "AuthenticAMD,23,113,0",
  		"nrcpus-online": 24,
  		"nrcpus-avail": 24,
  		"perf-version": "5.12.gee134f3189bd",
  		"cmdline": [
  			"/home/acme/bin/perf",
  			"record",
  			"sleep",
  			"0.1"
  		]
  	},
  	"samples": [
  		{
  			"timestamp": 170517539043684,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa6268827"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539048443,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa661359d"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539051018,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa6311e18"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539053652,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0x7fdb77b4812b",
  					"symbol": "_dl_start",
  					"dso": "ld-2.32.so"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539055306,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa6269286"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539057590,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0xffffffffa62abd8b"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539067559,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0x7fdb77b5e9e9",
  					"symbol": "__GI___tunables_init",
  					"dso": "ld-2.32.so"
  				}
  			]
  		},
  		{
  			"timestamp": 170517539282452,
  			"pid": 375844,
  			"tid": 375844,
  			"comm": "sleep",
  			"callchain": [
  				{
  					"ip": "0x7fdb779978d2",
  					"symbol": "getenv",
  					"dso": "libc-2.32.so"
  				}
  			]
  		}
  	]
  }
  $

Signed-off-by: Nicholas Fraser <nfraser@codeweavers.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Changbin Du <changbin.du@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tan Xiaojun <tanxiaojun@huawei.com>
Cc: Ulrich Czekalla <uczekalla@codeweavers.com>
Link: http://lore.kernel.org/lkml/3884969f-804d-2f53-c648-e2b0bd85edff@codeweavers.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Nicholas Fraser 2021-04-26 10:47:16 -04:00 committed by Arnaldo Carvalho de Melo
parent 5508c9dae2
commit d0713d4ca3
7 changed files with 418 additions and 21 deletions

View File

@ -17,7 +17,7 @@ Data file related processing.
COMMANDS
--------
convert::
Converts perf data file into another format (only CTF [1] format is support by now).
Converts perf data file into another format.
It's possible to set data-convert debug variable to get debug messages from conversion,
like:
perf --debug data-convert data convert ...
@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.
--to-json::
Triggers JSON conversion. Specify the JSON filename to output.
--tod::
Convert time to wall clock time.

View File

@ -7,7 +7,6 @@
#include "debug.h"
#include <subcmd/parse-options.h>
#include "data-convert.h"
#include "data-convert-bt.h"
typedef int (*data_cmd_fn_t)(int argc, const char **argv);
@ -55,6 +54,7 @@ static const char * const data_convert_usage[] = {
static int cmd_data_convert(int argc, const char **argv)
{
const char *to_json = NULL;
const char *to_ctf = NULL;
struct perf_data_convert_opts opts = {
.force = false,
@ -63,6 +63,7 @@ static int cmd_data_convert(int argc, const char **argv)
const struct option options[] = {
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_STRING('i', "input", &input_name, "file", "input file name"),
OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
@ -72,11 +73,6 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_END()
};
#ifndef HAVE_LIBBABELTRACE_SUPPORT
pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
return -1;
#endif
argc = parse_options(argc, argv, options,
data_convert_usage, 0);
if (argc) {
@ -84,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv)
return -1;
}
if (to_json && to_ctf) {
pr_err("You cannot specify both --to-ctf and --to-json.\n");
return -1;
}
if (!to_json && !to_ctf) {
pr_err("You must specify one of --to-ctf or --to-json.\n");
return -1;
}
if (to_json)
return bt_convert__perf2json(input_name, to_json, &opts);
if (to_ctf) {
#ifdef HAVE_LIBBABELTRACE_SUPPORT
return bt_convert__perf2ctf(input_name, to_ctf, &opts);
#else
pr_err("The libbabeltrace support is not compiled in.\n");
pr_err("The libbabeltrace support is not compiled in. perf should be "
"compiled with environment variables LIBBABELTRACE=1 and "
"LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
return -1;
#endif
}

View File

@ -165,6 +165,7 @@ perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
perf-y += data-convert-json.o
perf-y += scripting-engines/

View File

@ -21,7 +21,7 @@
#include <babeltrace/ctf/events.h>
#include <traceevent/event-parse.h>
#include "asm/bug.h"
#include "data-convert-bt.h"
#include "data-convert.h"
#include "session.h"
#include "debug.h"
#include "tool.h"

View File

@ -1,11 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __DATA_CONVERT_BT_H
#define __DATA_CONVERT_BT_H
#include "data-convert.h"
#ifdef HAVE_LIBBABELTRACE_SUPPORT
int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
struct perf_data_convert_opts *opts);
#endif /* HAVE_LIBBABELTRACE_SUPPORT */
#endif /* __DATA_CONVERT_BT_H */

View File

@ -0,0 +1,384 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* JSON export.
*
* Copyright (C) 2021, CodeWeavers Inc. <nfraser@codeweavers.com>
*/
#include "data-convert.h"
#include <fcntl.h>
#include <inttypes.h>
#include <sys/stat.h>
#include <unistd.h>
#include "linux/compiler.h"
#include "linux/err.h"
#include "util/auxtrace.h"
#include "util/debug.h"
#include "util/dso.h"
#include "util/event.h"
#include "util/evsel.h"
#include "util/evlist.h"
#include "util/header.h"
#include "util/map.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/tool.h"
struct convert_json {
struct perf_tool tool;
FILE *out;
bool first;
u64 events_count;
};
// Outputs a JSON-encoded string surrounded by quotes with characters escaped.
static void output_json_string(FILE *out, const char *s)
{
fputc('"', out);
while (*s) {
switch (*s) {
// required escapes with special forms as per RFC 8259
case '"': fputs("\\\"", out); break;
case '\\': fputs("\\\\", out); break;
case '\b': fputs("\\b", out); break;
case '\f': fputs("\\f", out); break;
case '\n': fputs("\\n", out); break;
case '\r': fputs("\\r", out); break;
case '\t': fputs("\\t", out); break;
default:
// all other control characters must be escaped by hex code
if (*s <= 0x1f)
fprintf(out, "\\u%04x", *s);
else
fputc(*s, out);
break;
}
++s;
}
fputc('"', out);
}
// Outputs an optional comma, newline and indentation to delimit a new value
// from the previous one in a JSON object or array.
static void output_json_delimiters(FILE *out, bool comma, int depth)
{
int i;
if (comma)
fputc(',', out);
fputc('\n', out);
for (i = 0; i < depth; ++i)
fputc('\t', out);
}
// Outputs a printf format string (with delimiter) as a JSON value.
__printf(4, 5)
static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...)
{
va_list args;
output_json_delimiters(out, comma, depth);
va_start(args, format);
vfprintf(out, format, args);
va_end(args);
}
// Outputs a JSON key-value pair where the value is a string.
static void output_json_key_string(FILE *out, bool comma, int depth,
const char *key, const char *value)
{
output_json_delimiters(out, comma, depth);
output_json_string(out, key);
fputs(": ", out);
output_json_string(out, value);
}
// Outputs a JSON key-value pair where the value is a printf format string.
__printf(5, 6)
static void output_json_key_format(FILE *out, bool comma, int depth,
const char *key, const char *format, ...)
{
va_list args;
output_json_delimiters(out, comma, depth);
output_json_string(out, key);
fputs(": ", out);
va_start(args, format);
vfprintf(out, format, args);
va_end(args);
}
static void output_sample_callchain_entry(struct perf_tool *tool,
u64 ip, struct addr_location *al)
{
struct convert_json *c = container_of(tool, struct convert_json, tool);
FILE *out = c->out;
output_json_format(out, false, 4, "{");
output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip);
if (al && al->sym && al->sym->namelen) {
fputc(',', out);
output_json_key_string(out, false, 5, "symbol", al->sym->name);
if (al->map && al->map->dso) {
const char *dso = al->map->dso->short_name;
if (dso && strlen(dso) > 0) {
fputc(',', out);
output_json_key_string(out, false, 5, "dso", dso);
}
}
}
output_json_format(out, false, 4, "}");
}
static int process_sample_event(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct evsel *evsel __maybe_unused,
struct machine *machine)
{
struct convert_json *c = container_of(tool, struct convert_json, tool);
FILE *out = c->out;
struct addr_location al, tal;
u8 cpumode = PERF_RECORD_MISC_USER;
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("Sample resolution failed!\n");
return -1;
}
++c->events_count;
if (c->first)
c->first = false;
else
fputc(',', out);
output_json_format(out, false, 2, "{");
output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time);
output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_);
output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid);
if (al.thread->cpu >= 0)
output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu);
output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread));
output_json_key_format(out, true, 3, "callchain", "[");
if (sample->callchain) {
unsigned int i;
bool ok;
bool first_callchain = true;
for (i = 0; i < sample->callchain->nr; ++i) {
u64 ip = sample->callchain->ips[i];
if (ip >= PERF_CONTEXT_MAX) {
switch (ip) {
case PERF_CONTEXT_HV:
cpumode = PERF_RECORD_MISC_HYPERVISOR;
break;
case PERF_CONTEXT_KERNEL:
cpumode = PERF_RECORD_MISC_KERNEL;
break;
case PERF_CONTEXT_USER:
cpumode = PERF_RECORD_MISC_USER;
break;
default:
pr_debug("invalid callchain context: %"
PRId64 "\n", (s64) ip);
break;
}
continue;
}
if (first_callchain)
first_callchain = false;
else
fputc(',', out);
ok = thread__find_symbol(al.thread, cpumode, ip, &tal);
output_sample_callchain_entry(tool, ip, ok ? &tal : NULL);
}
} else {
output_sample_callchain_entry(tool, sample->ip, &al);
}
output_json_format(out, false, 3, "]");
output_json_format(out, false, 2, "}");
return 0;
}
static void output_headers(struct perf_session *session, struct convert_json *c)
{
struct stat st;
struct perf_header *header = &session->header;
int ret;
int fd = perf_data__fd(session->data);
int i;
FILE *out = c->out;
output_json_key_format(out, false, 2, "header-version", "%u", header->version);
ret = fstat(fd, &st);
if (ret >= 0) {
time_t stctime = st.st_mtime;
char buf[256];
strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime));
output_json_key_string(out, true, 2, "captured-on", buf);
} else {
pr_debug("Failed to get mtime of source file, not writing captured-on");
}
output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset);
output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
output_json_key_string(out, true, 2, "hostname", header->env.hostname);
output_json_key_string(out, true, 2, "os-release", header->env.os_release);
output_json_key_string(out, true, 2, "arch", header->env.arch);
output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
if (header->env.clock.enabled) {
output_json_key_format(out, true, 2, "clockid",
"%u", header->env.clock.clockid);
output_json_key_format(out, true, 2, "clock-time",
"%" PRIu64, header->env.clock.clockid_ns);
output_json_key_format(out, true, 2, "real-time",
"%" PRIu64, header->env.clock.tod_ns);
}
output_json_key_string(out, true, 2, "perf-version", header->env.version);
output_json_key_format(out, true, 2, "cmdline", "[");
for (i = 0; i < header->env.nr_cmdline; i++) {
output_json_delimiters(out, i != 0, 3);
output_json_string(c->out, header->env.cmdline_argv[i]);
}
output_json_format(out, false, 2, "]");
}
int bt_convert__perf2json(const char *input_name, const char *output_name,
struct perf_data_convert_opts *opts __maybe_unused)
{
struct perf_session *session;
int fd;
int ret = -1;
struct convert_json c = {
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.cgroup = perf_event__process_cgroup,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.event_update = perf_event__process_event_update,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
.first = true,
.events_count = 0,
};
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
.path = input_name,
.force = opts->force,
};
if (opts->all) {
pr_err("--all is currently unsupported for JSON output.\n");
goto err;
}
if (opts->tod) {
pr_err("--tod is currently unsupported for JSON output.\n");
goto err;
}
fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666);
if (fd == -1) {
if (errno == EEXIST)
pr_err("Output file exists. Use --force to overwrite it.\n");
else
pr_err("Error opening output file!\n");
goto err;
}
c.out = fdopen(fd, "w");
if (!c.out) {
fprintf(stderr, "Error opening output file!\n");
close(fd);
goto err;
}
session = perf_session__new(&data, false, &c.tool);
if (IS_ERR(session)) {
fprintf(stderr, "Error creating perf session!\n");
goto err_fclose;
}
if (symbol__init(&session->header.env) < 0) {
fprintf(stderr, "Symbol init error!\n");
goto err_session_delete;
}
// The opening brace is printed manually because it isn't delimited from a
// previous value (i.e. we don't want a leading newline)
fputc('{', c.out);
// Version number for future-proofing. Most additions should be able to be
// done in a backwards-compatible way so this should only need to be bumped
// if some major breaking change must be made.
output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1");
// Output headers
output_json_format(c.out, true, 1, "\"headers\": {");
output_headers(session, &c);
output_json_format(c.out, false, 1, "}");
// Output samples
output_json_format(c.out, true, 1, "\"samples\": [");
perf_session__process_events(session);
output_json_format(c.out, false, 1, "]");
output_json_format(c.out, false, 0, "}");
fputc('\n', c.out);
fprintf(stderr,
"[ perf data convert: Converted '%s' into JSON data '%s' ]\n",
data.path, output_name);
fprintf(stderr,
"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n",
(ftell(c.out)) / 1024.0 / 1024.0, c.events_count);
ret = 0;
err_session_delete:
perf_session__delete(session);
err_fclose:
fclose(c.out);
err:
return ret;
}

View File

@ -2,10 +2,20 @@
#ifndef __DATA_CONVERT_H
#define __DATA_CONVERT_H
#include <stdbool.h>
struct perf_data_convert_opts {
bool force;
bool all;
bool tod;
};
#ifdef HAVE_LIBBABELTRACE_SUPPORT
int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
struct perf_data_convert_opts *opts);
#endif /* HAVE_LIBBABELTRACE_SUPPORT */
int bt_convert__perf2json(const char *input_name, const char *to_ctf,
struct perf_data_convert_opts *opts);
#endif /* __DATA_CONVERT_H */