perf tools: Add documentation for perf.data on disk format
Add some documentation for the on disk format of perf.data. This is not documenting the actual perf events -- which are documented in perf_event.h -- but just the additional headers that perf record adds around them when writing the data to disk. Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/1466800885-12974-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
d905768c9e
commit
d4897e1935
|
@ -0,0 +1,442 @@
|
|||
perf.data format
|
||||
|
||||
Uptodate as of v4.7
|
||||
|
||||
This document describes the on-disk perf.data format, generated by perf record
|
||||
or perf inject and consumed by the other perf tools.
|
||||
|
||||
On a high level perf.data contains the events generated by the PMUs, plus metadata.
|
||||
|
||||
All fields are in native-endian of the machine that generated the perf.data.
|
||||
|
||||
When perf is writing to a pipe it uses a special version of the file
|
||||
format that does not rely on seeking to adjust data offsets. This
|
||||
format is not described here. The pipe version can be converted to
|
||||
normal perf.data with perf inject.
|
||||
|
||||
The file starts with a perf_header:
|
||||
|
||||
struct perf_header {
|
||||
char magic[8]; /* PERFILE2 */
|
||||
uint64_t size; /* size of the header */
|
||||
uint64_t attr_size; /* size of an attribute in attrs */
|
||||
struct perf_file_section attrs;
|
||||
struct perf_file_section data;
|
||||
struct perf_file_section event_types;
|
||||
uint64_t flags;
|
||||
uint64_t flags1[3];
|
||||
};
|
||||
|
||||
The magic number identifies the perf file and the version. Current perf versions
|
||||
use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
|
||||
is not described here. The magic number also identifies the endian. When the
|
||||
magic value is 64bit byte swapped compared the file is in non-native
|
||||
endian.
|
||||
|
||||
A perf_file_section contains a pointer to another section of the perf file.
|
||||
The header contains three such pointers: for attributes, data and event types.
|
||||
|
||||
struct perf_file_section {
|
||||
uint64_t offset; /* offset from start of file */
|
||||
uint64_t size; /* size of the section */
|
||||
};
|
||||
|
||||
Flags section:
|
||||
|
||||
The header is followed by different optional headers, described by the bits set
|
||||
in flags. Only headers for which the bit is set are included. Each header
|
||||
consists of a perf_file_section located after the initial header.
|
||||
The respective perf_file_section points to the data of the additional
|
||||
header and defines its size.
|
||||
|
||||
Some headers consist of strings, which are defined like this:
|
||||
|
||||
struct perf_header_string {
|
||||
uint32_t len;
|
||||
char string[len]; /* zero terminated */
|
||||
};
|
||||
|
||||
Some headers consist of a sequence of strings, which start with a
|
||||
|
||||
struct perf_header_string_list {
|
||||
uint32_t nr;
|
||||
struct perf_header_string strings[nr]; /* variable length records */
|
||||
};
|
||||
|
||||
The bits are the flags bits in a 256 bit bitmap starting with
|
||||
flags. These define the valid bits:
|
||||
|
||||
HEADER_RESERVED = 0, /* always cleared */
|
||||
HEADER_FIRST_FEATURE = 1,
|
||||
HEADER_TRACING_DATA = 1,
|
||||
|
||||
Describe me.
|
||||
|
||||
HEADER_BUILD_ID = 2,
|
||||
|
||||
The header consists of an sequence of build_id_event. The size of each record
|
||||
is defined by header.size (see perf_event.h). Each event defines a ELF build id
|
||||
for a executable file name for a pid. An ELF build id is a unique identifier
|
||||
assigned by the linker to an executable.
|
||||
|
||||
struct build_id_event {
|
||||
struct perf_event_header header;
|
||||
pid_t pid;
|
||||
uint8_t build_id[24];
|
||||
char filename[header.size - offsetof(struct build_id_event, filename)];
|
||||
};
|
||||
|
||||
HEADER_HOSTNAME = 3,
|
||||
|
||||
A perf_header_string with the hostname where the data was collected
|
||||
(uname -n)
|
||||
|
||||
HEADER_OSRELEASE = 4,
|
||||
|
||||
A perf_header_string with the os release where the data was collected
|
||||
(uname -r)
|
||||
|
||||
HEADER_VERSION = 5,
|
||||
|
||||
A perf_header_string with the perf user tool version where the
|
||||
data was collected. This is the same as the version of the source tree
|
||||
the perf tool was built from.
|
||||
|
||||
HEADER_ARCH = 6,
|
||||
|
||||
A perf_header_string with the CPU architecture (uname -m)
|
||||
|
||||
HEADER_NRCPUS = 7,
|
||||
|
||||
A structure defining the number of CPUs.
|
||||
|
||||
struct nr_cpus {
|
||||
uint32_t nr_cpus_online;
|
||||
uint32_t nr_cpus_available; /* CPUs not yet onlined */
|
||||
};
|
||||
|
||||
HEADER_CPUDESC = 8,
|
||||
|
||||
A perf_header_string with description of the CPU. On x86 this is the model name
|
||||
in /proc/cpuinfo
|
||||
|
||||
HEADER_CPUID = 9,
|
||||
|
||||
A perf_header_string with the exact CPU type. On x86 this is
|
||||
vendor,family,model,stepping. For example: GenuineIntel,6,69,1
|
||||
|
||||
HEADER_TOTAL_MEM = 10,
|
||||
|
||||
An uint64_t with the total memory in bytes.
|
||||
|
||||
HEADER_CMDLINE = 11,
|
||||
|
||||
A perf_header_string with the perf command line used to collect the data.
|
||||
|
||||
HEADER_EVENT_DESC = 12,
|
||||
|
||||
Another description of the perf_event_attrs, more detailed than header.attrs
|
||||
including IDs and names. See perf_event.h or the man page for a description
|
||||
of a struct perf_event_attr.
|
||||
|
||||
struct {
|
||||
uint32_t nr; /* number of events */
|
||||
uint32_t attr_size; /* size of each perf_event_attr */
|
||||
struct {
|
||||
struct perf_event_attr attr; /* size of attr_size */
|
||||
uint32_t nr_ids;
|
||||
struct perf_header_string event_string;
|
||||
uint64_t ids[nr_ids];
|
||||
} events[nr]; /* Variable length records */
|
||||
};
|
||||
|
||||
HEADER_CPU_TOPOLOGY = 13,
|
||||
|
||||
String lists defining the core and CPU threads topology.
|
||||
|
||||
struct {
|
||||
struct perf_header_string_list cores; /* Variable length */
|
||||
struct perf_header_string_list threads; /* Variable length */
|
||||
};
|
||||
|
||||
Example:
|
||||
sibling cores : 0-3
|
||||
sibling threads : 0-1
|
||||
sibling threads : 2-3
|
||||
|
||||
HEADER_NUMA_TOPOLOGY = 14,
|
||||
|
||||
A list of NUMA node descriptions
|
||||
|
||||
struct {
|
||||
uint32_t nr;
|
||||
struct {
|
||||
uint32_t nodenr;
|
||||
uint64_t mem_total;
|
||||
uint64_t mem_free;
|
||||
struct perf_header_string cpus;
|
||||
} nodes[nr]; /* Variable length records */
|
||||
};
|
||||
|
||||
HEADER_BRANCH_STACK = 15,
|
||||
|
||||
Not implemented in perf.
|
||||
|
||||
HEADER_PMU_MAPPINGS = 16,
|
||||
|
||||
A list of PMU structures, defining the different PMUs supported by perf.
|
||||
|
||||
struct {
|
||||
uint32_t nr;
|
||||
struct pmu {
|
||||
uint32_t pmu_type;
|
||||
struct perf_header_string pmu_name;
|
||||
} [nr]; /* Variable length records */
|
||||
};
|
||||
|
||||
HEADER_GROUP_DESC = 17,
|
||||
|
||||
Description of counter groups ({...} in perf syntax)
|
||||
|
||||
struct {
|
||||
uint32_t nr;
|
||||
struct {
|
||||
struct perf_header_string string;
|
||||
uint32_t leader_idx;
|
||||
uint32_t nr_members;
|
||||
} [nr]; /* Variable length records */
|
||||
};
|
||||
|
||||
HEADER_AUXTRACE = 18,
|
||||
|
||||
Define additional auxtrace areas in the perf.data. auxtrace is used to store
|
||||
undecoded hardware tracing information, such as Intel Processor Trace data.
|
||||
|
||||
/**
|
||||
* struct auxtrace_index_entry - indexes a AUX area tracing event within a
|
||||
* perf.data file.
|
||||
* @file_offset: offset within the perf.data file
|
||||
* @sz: size of the event
|
||||
*/
|
||||
struct auxtrace_index_entry {
|
||||
u64 file_offset;
|
||||
u64 sz;
|
||||
};
|
||||
|
||||
#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
|
||||
|
||||
/**
|
||||
* struct auxtrace_index - index of AUX area tracing events within a perf.data
|
||||
* file.
|
||||
* @list: linking a number of arrays of entries
|
||||
* @nr: number of entries
|
||||
* @entries: array of entries
|
||||
*/
|
||||
struct auxtrace_index {
|
||||
struct list_head list;
|
||||
size_t nr;
|
||||
struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
|
||||
};
|
||||
|
||||
other bits are reserved and should ignored for now
|
||||
HEADER_FEAT_BITS = 256,
|
||||
|
||||
Attributes
|
||||
|
||||
This is an array of perf_event_attrs, each attr_size bytes long, which defines
|
||||
each event collected. See perf_event.h or the man page for a detailed
|
||||
description.
|
||||
|
||||
Data
|
||||
|
||||
This section is the bulk of the file. It consist of a stream of perf_events
|
||||
describing events. This matches the format generated by the kernel.
|
||||
See perf_event.h or the manpage for a detailed description.
|
||||
|
||||
Some notes on parsing:
|
||||
|
||||
Ordering
|
||||
|
||||
The events are not necessarily in time stamp order, as they can be
|
||||
collected in parallel on different CPUs. If the events should be
|
||||
processed in time order they need to be sorted first. It is possible
|
||||
to only do a partial sort using the FINISHED_ROUND event header (see
|
||||
below). perf record guarantees that there is no reordering over a
|
||||
FINISHED_ROUND.
|
||||
|
||||
ID vs IDENTIFIER
|
||||
|
||||
When the event stream contains multiple events each event is identified
|
||||
by an ID. This can be either through the PERF_SAMPLE_ID or the
|
||||
PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
|
||||
at a fixed offset from the event header, which allows reliable
|
||||
parsing of the header. Relying on ID may be ambigious.
|
||||
IDENTIFIER is only supported by newer Linux kernels.
|
||||
|
||||
Perf record specific events:
|
||||
|
||||
In addition to the kernel generated event types perf record adds its
|
||||
own event types (in addition it also synthesizes some kernel events,
|
||||
for example MMAP events)
|
||||
|
||||
PERF_RECORD_USER_TYPE_START = 64,
|
||||
PERF_RECORD_HEADER_ATTR = 64,
|
||||
|
||||
struct attr_event {
|
||||
struct perf_event_header header;
|
||||
struct perf_event_attr attr;
|
||||
uint64_t id[];
|
||||
};
|
||||
|
||||
PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */
|
||||
|
||||
#define MAX_EVENT_NAME 64
|
||||
|
||||
struct perf_trace_event_type {
|
||||
uint64_t event_id;
|
||||
char name[MAX_EVENT_NAME];
|
||||
};
|
||||
|
||||
struct event_type_event {
|
||||
struct perf_event_header header;
|
||||
struct perf_trace_event_type event_type;
|
||||
};
|
||||
|
||||
|
||||
PERF_RECORD_HEADER_TRACING_DATA = 66,
|
||||
|
||||
Describe me
|
||||
|
||||
struct tracing_data_event {
|
||||
struct perf_event_header header;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
PERF_RECORD_HEADER_BUILD_ID = 67,
|
||||
|
||||
Define a ELF build ID for a referenced executable.
|
||||
|
||||
struct build_id_event; /* See above */
|
||||
|
||||
PERF_RECORD_FINISHED_ROUND = 68,
|
||||
|
||||
No event reordering over this header. No payload.
|
||||
|
||||
PERF_RECORD_ID_INDEX = 69,
|
||||
|
||||
Map event ids to CPUs and TIDs.
|
||||
|
||||
struct id_index_entry {
|
||||
uint64_t id;
|
||||
uint64_t idx;
|
||||
uint64_t cpu;
|
||||
uint64_t tid;
|
||||
};
|
||||
|
||||
struct id_index_event {
|
||||
struct perf_event_header header;
|
||||
uint64_t nr;
|
||||
struct id_index_entry entries[nr];
|
||||
};
|
||||
|
||||
PERF_RECORD_AUXTRACE_INFO = 70,
|
||||
|
||||
Auxtrace type specific information. Describe me
|
||||
|
||||
struct auxtrace_info_event {
|
||||
struct perf_event_header header;
|
||||
uint32_t type;
|
||||
uint32_t reserved__; /* For alignment */
|
||||
uint64_t priv[];
|
||||
};
|
||||
|
||||
PERF_RECORD_AUXTRACE = 71,
|
||||
|
||||
Defines auxtrace data. Followed by the actual data. The contents of
|
||||
the auxtrace data is dependent on the event and the CPU. For example
|
||||
for Intel Processor Trace it contains Processor Trace data generated
|
||||
by the CPU.
|
||||
|
||||
struct auxtrace_event {
|
||||
struct perf_event_header header;
|
||||
uint64_t size;
|
||||
uint64_t offset;
|
||||
uint64_t reference;
|
||||
uint32_t idx;
|
||||
uint32_t tid;
|
||||
uint32_t cpu;
|
||||
uint32_t reserved__; /* For alignment */
|
||||
};
|
||||
|
||||
struct aux_event {
|
||||
struct perf_event_header header;
|
||||
uint64_t aux_offset;
|
||||
uint64_t aux_size;
|
||||
uint64_t flags;
|
||||
};
|
||||
|
||||
PERF_RECORD_AUXTRACE_ERROR = 72,
|
||||
|
||||
Describes an error in hardware tracing
|
||||
|
||||
enum auxtrace_error_type {
|
||||
PERF_AUXTRACE_ERROR_ITRACE = 1,
|
||||
PERF_AUXTRACE_ERROR_MAX
|
||||
};
|
||||
|
||||
#define MAX_AUXTRACE_ERROR_MSG 64
|
||||
|
||||
struct auxtrace_error_event {
|
||||
struct perf_event_header header;
|
||||
uint32_t type;
|
||||
uint32_t code;
|
||||
uint32_t cpu;
|
||||
uint32_t pid;
|
||||
uint32_t tid;
|
||||
uint32_t reserved__; /* For alignment */
|
||||
uint64_t ip;
|
||||
char msg[MAX_AUXTRACE_ERROR_MSG];
|
||||
};
|
||||
|
||||
Event types
|
||||
|
||||
Define the event attributes with their IDs.
|
||||
|
||||
An array bound by the perf_file_section size.
|
||||
|
||||
struct {
|
||||
struct perf_event_attr attr; /* Size defined by header.attr_size */
|
||||
struct perf_file_section ids;
|
||||
}
|
||||
|
||||
ids points to a array of uint64_t defining the ids for event attr attr.
|
||||
|
||||
References:
|
||||
|
||||
include/uapi/linux/perf_event.h
|
||||
|
||||
This is the canonical description of the kernel generated perf_events
|
||||
and the perf_event_attrs.
|
||||
|
||||
perf_events manpage
|
||||
|
||||
A manpage describing perf_event and perf_event_attr is here:
|
||||
http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
|
||||
This tends to be slightly behind the kernel include, but has better
|
||||
descriptions. An (typically older) version of the man page may be
|
||||
included with the standard Linux man pages, available with "man
|
||||
perf_events"
|
||||
|
||||
pmu-tools
|
||||
|
||||
https://github.com/andikleen/pmu-tools/tree/master/parser
|
||||
|
||||
A definition of the perf.data format in python "construct" format is available
|
||||
in pmu-tools parser. This allows to read perf.data from python and dump it.
|
||||
|
||||
quipper
|
||||
|
||||
The quipper C++ parser is available at
|
||||
https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
|
||||
Unfortunately this parser tends to be many versions behind and may not be able
|
||||
to parse data files generated by recent perf.
|
Loading…
Reference in New Issue